diff --git a/demos/dbbrowse/makefile.gtk b/demos/dbbrowse/makefile.gtk
deleted file mode 100644
index cd8a960812..0000000000
--- a/demos/dbbrowse/makefile.gtk
+++ /dev/null
@@ -1,15 +0,0 @@
-
-# Top dir of wxWindows
-top_builddir = /gtm/bart/wxGTK
-
-PROGRAM=dbbrowser_gtk
-
-
-OBJECTS= dbbrowse.o doc.o pgmctrl.o tabpgwin.o\
- browsedb.o dbtree.o dbgrid.o dlguser.o
-
-
-
-
-include $(top_builddir)/src/makeprog.env
-
diff --git a/src/html/htmlfilter.cpp b/src/html/htmlfilter.cpp
deleted file mode 100644
index 57508b1069..0000000000
--- a/src/html/htmlfilter.cpp
+++ /dev/null
@@ -1,172 +0,0 @@
-/////////////////////////////////////////////////////////////////////////////
-// Name: filter.cpp
-// Purpose: wxHtmlFilter - input filter for translating into HTML format
-// Author: Vaclav Slavik
-// Copyright: (c) 1999 Vaclav Slavik
-// Licence: wxWindows Licence
-/////////////////////////////////////////////////////////////////////////////
-
-
-#ifdef __GNUG__
-#pragma implementation "htmlfilter.h"
-#endif
-
-#include "wx/wxprec.h"
-
-#if wxUSE_HTML
-
-#ifdef __BORDLANDC__
-#pragma hdrstop
-#endif
-
-#ifndef WXPRECOMP
-#endif
-
-#include "wx/html/htmlfilter.h"
-#include "wx/html/htmlwin.h"
-
-
-/*
-
-There is code for several default filters:
-
-*/
-
-IMPLEMENT_ABSTRACT_CLASS(wxHtmlFilter, wxObject)
-
-//--------------------------------------------------------------------------------
-// wxHtmlFilterPlainText
-// filter for text/plain or uknown
-//--------------------------------------------------------------------------------
-
-IMPLEMENT_DYNAMIC_CLASS(wxHtmlFilterPlainText, wxHtmlFilter)
-
-bool wxHtmlFilterPlainText::CanRead(const wxFSFile& WXUNUSED(file)) const
-{
- return TRUE;
-}
-
-
-
-wxString wxHtmlFilterPlainText::ReadFile(const wxFSFile& file) const
-{
- wxInputStream *s = file.GetStream();
- char *src;
- wxString doc, doc2;
-
- if (s == NULL) return wxEmptyString;
- src = new char[s -> GetSize()+1];
- src[s -> GetSize()] = 0;
- s -> Read(src, s -> GetSize());
- doc = src;
- delete [] src;
-
- doc.Replace(_T("<"), _T("<"), TRUE);
- doc.Replace(_T(">"), _T(">"), TRUE);
- doc2 = _T("
\n") + doc + _T("\n
");
- return doc2;
-}
-
-
-
-
-
-//--------------------------------------------------------------------------------
-// wxHtmlFilterImage
-// filter for image/*
-//--------------------------------------------------------------------------------
-
-class wxHtmlFilterImage : public wxHtmlFilter
-{
- DECLARE_DYNAMIC_CLASS(wxHtmlFilterImage)
-
- public:
- virtual bool CanRead(const wxFSFile& file) const;
- virtual wxString ReadFile(const wxFSFile& file) const;
-};
-
-IMPLEMENT_DYNAMIC_CLASS(wxHtmlFilterImage, wxHtmlFilter)
-
-
-
-bool wxHtmlFilterImage::CanRead(const wxFSFile& file) const
-{
- return (file.GetMimeType().Left(6) == "image/");
-}
-
-
-
-wxString wxHtmlFilterImage::ReadFile(const wxFSFile& file) const
-{
- return ("
");
-}
-
-
-
-
-//--------------------------------------------------------------------------------
-// wxHtmlFilterPlainText
-// filter for text/plain or uknown
-//--------------------------------------------------------------------------------
-
-class wxHtmlFilterHTML : public wxHtmlFilter
-{
- DECLARE_DYNAMIC_CLASS(wxHtmlFilterHTML)
-
- public:
- virtual bool CanRead(const wxFSFile& file) const;
- virtual wxString ReadFile(const wxFSFile& file) const;
-};
-
-
-IMPLEMENT_DYNAMIC_CLASS(wxHtmlFilterHTML, wxHtmlFilter)
-
-bool wxHtmlFilterHTML::CanRead(const wxFSFile& file) const
-{
-// return (file.GetMimeType() == "text/html");
-// This is true in most case but some page can return:
-// "text/html; char-encoding=...."
-// So we use Find instead
- return (file.GetMimeType().Find(_T("text/html")) == 0);
-}
-
-
-
-wxString wxHtmlFilterHTML::ReadFile(const wxFSFile& file) const
-{
- wxInputStream *s = file.GetStream();
- char *src;
- wxString doc;
-
- if (s == NULL) return wxEmptyString;
- src = new char[s -> GetSize() + 1];
- src[s -> GetSize()] = 0;
- s -> Read(src, s -> GetSize());
- doc = src;
- delete[] src;
-
- return doc;
-}
-
-
-
-
-///// Module:
-
-class wxHtmlFilterModule : public wxModule
-{
- DECLARE_DYNAMIC_CLASS(wxHtmlFilterModule)
-
- public:
- virtual bool OnInit()
- {
- wxHtmlWindow::AddFilter(new wxHtmlFilterHTML);
- wxHtmlWindow::AddFilter(new wxHtmlFilterImage);
- return TRUE;
- }
- virtual void OnExit() {}
-};
-
-IMPLEMENT_DYNAMIC_CLASS(wxHtmlFilterModule, wxModule)
-
-#endif
diff --git a/src/html/htmlhelp.cpp b/src/html/htmlhelp.cpp
deleted file mode 100644
index 8ea6466640..0000000000
--- a/src/html/htmlhelp.cpp
+++ /dev/null
@@ -1,839 +0,0 @@
-// Name: htmlhelp.cpp
-// Purpose: Help controller
-// Author: Vaclav Slavik
-// Copyright: (c) 1999 Vaclav Slavik
-// Licence: wxWindows Licence
-/////////////////////////////////////////////////////////////////////////////
-
-#error This file should not be compiled! Update your build system! \
-(configure users, rerun configure to get a new Makefile) \
-Instead of htmlhelp[_io], use helpdata, helpfrm and helpctrl. This \
-file is only left to point out the problem and will be removed r.s.n.
-
-#ifdef __GNUG__
-#pragma implementation "htmlhelp.h"
-#endif
-
-#include "wx/wxprec.h"
-
-#if wxUSE_HTML
-
-#ifdef __BORDLANDC__
-#pragma hdrstop
-#endif
-
-#ifndef WXPRECOMP
-#endif
-
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-
-#if !((wxVERSION_NUMBER < 2100) || ((wxVERSION_NUMBER == 2100) && (wxBETA_NUMBER < 7)))
-#include
-#endif
-
-
-// Bitmaps:
-
-#ifndef __WXMSW__
- // XPM hack: make the arrays const
- #define static static const
-
- #include "bitmaps/panel.xpm"
- #include "bitmaps/back.xpm"
- #include "bitmaps/forward.xpm"
- #include "bitmaps/book.xpm"
- #include "bitmaps/folder.xpm"
- #include "bitmaps/page.xpm"
-
- #undef static
-#endif
-
-#include "search.h"
-
-
-
-
-#include
-WX_DEFINE_OBJARRAY(HtmlBookRecArray)
-
-
-
-
-
-
-
-
-
-//-----------------------------------------------------------------------------
-// wxHtmlHelpController
-//-----------------------------------------------------------------------------
-
-
-IMPLEMENT_DYNAMIC_CLASS(wxHtmlHelpController, wxEvtHandler)
-
-
-wxHtmlHelpController::wxHtmlHelpController() : wxEvtHandler()
-{
- m_Frame = NULL;
- m_Config = NULL;
- m_ConfigRoot = wxEmptyString;
- m_TitleFormat = _("Help : %s");
- m_TempPath = wxEmptyString;
-
- m_Cfg.x = m_Cfg.y = 0;
- m_Cfg.w = 700; m_Cfg.h = 480;
- m_Cfg.sashpos = 240;
- m_Cfg.navig_on = TRUE;
-
- m_ContentsImageList = new wxImageList(12, 12);
- m_ContentsImageList -> Add(wxICON(book));
- m_ContentsImageList -> Add(wxICON(folder));
- m_ContentsImageList -> Add(wxICON(page));
-
- m_Contents = NULL;
- m_ContentsCnt = 0;
- m_Index = NULL;
- m_IndexCnt = 0;
-
- m_IndexBox = NULL;
- m_ContentsBox = NULL;
- m_SearchList = NULL;
- m_SearchText = NULL;
- m_SearchButton = NULL;
- m_HtmlWin = NULL;
- m_Splitter = NULL;
- m_NavigPan = NULL;
-}
-
-
-
-wxHtmlHelpController::~wxHtmlHelpController()
-{
- int i;
-
- m_BookRecords.Empty();
- delete m_ContentsImageList;
- if (m_Contents) {
- for (i = 0; i < m_ContentsCnt; i++) {
- delete[] m_Contents[i].m_Page;
- delete[] m_Contents[i].m_Name;
- }
- free(m_Contents);
- }
- if (m_Index) {
- for (i = 0; i < m_IndexCnt; i++) {
- delete[] m_Index[i].m_Page;
- delete[] m_Index[i].m_Name;
- }
- free(m_Index);
- }
-}
-
-
-
-void wxHtmlHelpController::SetTempDir(const wxString& path)
-{
- if (path == wxEmptyString) m_TempPath = path;
- else {
- if (wxIsAbsolutePath(path)) m_TempPath = path;
- else m_TempPath = wxGetCwd() + "/" + path;
-
- if (m_TempPath[m_TempPath.Length() - 1] != '/')
- m_TempPath << "/";
- }
-}
-
-
-
-
-// Reads one line, stores it into buf and returns pointer to new line or NULL.
-static char* ReadLine(char *line, char *buf)
-{
- char *writeptr = buf, *readptr = line;
-
- while (*readptr != 0 && *readptr != '\r' && *readptr != '\n') *(writeptr++) = *(readptr++);
- *writeptr = 0;
- while (*readptr == '\r' || *readptr == '\n') readptr++;
- if (*readptr == 0) return NULL;
- else return readptr;
-}
-
-
-static wxString SafeFileName(const wxString& s)
-{
- wxString res = s;
- res.Replace(_T(":"), _T("_"), TRUE);
- res.Replace(_T(" "), _T("_"), TRUE);
- res.Replace(_T("/"), _T("_"), TRUE);
- res.Replace(_T("\\"), _T("_"), TRUE);
- res.Replace(_T("#"), _T("_"), TRUE);
- res.Replace(_T("."), _T("_"), TRUE);
- return res;
-}
-
-
-static int IndexCompareFunc(const void *a, const void *b)
-{
- return strcmp(((HtmlContentsItem*)a) -> m_Name, ((HtmlContentsItem*)b) -> m_Name);
-}
-
-
-
-bool wxHtmlHelpController::AddBook(const wxString& book, bool show_wait_msg)
-{
- wxFSFile *fi;
- wxFileSystem fsys;
- wxInputStream *s;
- HtmlBookRecord *bookr;
- wxString bookFull;
-
- int sz;
- char *buff, *lineptr;
- char linebuf[300];
-
- wxString title = _("noname"),
- safetitle,
- start = wxEmptyString,
- contents = wxEmptyString, index = wxEmptyString;
-
- if (wxIsAbsolutePath(book)) bookFull = book;
- else bookFull = wxGetCwd() + "/" + book;
-
- fi = fsys.OpenFile(bookFull);
- if (fi == NULL) return FALSE;
- fsys.ChangePathTo(bookFull);
- s = fi -> GetStream();
- sz = s -> GetSize();
- buff = new char[sz+1];
- buff[sz] = 0;
- s -> Read(buff, sz);
- lineptr = buff;
- delete fi;
-
- while ((lineptr = ReadLine(lineptr, linebuf)) != NULL) {
- if (strstr(linebuf, "Title=") == linebuf)
- title = linebuf + strlen("Title=");
- if (strstr(linebuf, "Default topic=") == linebuf)
- start = linebuf + strlen("Default topic=");
- if (strstr(linebuf, "Index file=") == linebuf)
- index = linebuf + strlen("Index file=");
- if (strstr(linebuf, "Contents file=") == linebuf)
- contents = linebuf + strlen("Contents file=");
- }
- delete[] buff;
-
- bookr = new HtmlBookRecord(fsys.GetPath(), title, start);
-
- if (m_ContentsCnt % HTML_REALLOC_STEP == 0)
- m_Contents = (HtmlContentsItem*) realloc(m_Contents, (m_ContentsCnt + HTML_REALLOC_STEP) * sizeof(HtmlContentsItem));
- m_Contents[m_ContentsCnt].m_Level = 0;
- m_Contents[m_ContentsCnt].m_ID = 0;
- m_Contents[m_ContentsCnt].m_Page = new char[start.Length() + 1];
- strcpy(m_Contents[m_ContentsCnt].m_Page, start.c_str());
- m_Contents[m_ContentsCnt].m_Name = new char [title.Length() + 1];
- strcpy(m_Contents[m_ContentsCnt].m_Name, title.c_str());
- m_Contents[m_ContentsCnt].m_Book = bookr;
- m_ContentsCnt++;
-
- // Try to find cached binary versions:
- safetitle = SafeFileName(title);
- fi = fsys.OpenFile(safetitle + ".cached");
- if (fi == NULL) fi = fsys.OpenFile(m_TempPath + safetitle + ".cached");
- if ((fi == NULL) || (m_TempPath == wxEmptyString)) {
- LoadMSProject(bookr, fsys, index, contents, show_wait_msg);
- if (m_TempPath != wxEmptyString) {
- wxFileOutputStream *outs = new wxFileOutputStream(m_TempPath + safetitle + ".cached");
- SaveCachedBook(bookr, outs);
- delete outs;
- }
- }
- else {
- LoadCachedBook(bookr, fi -> GetStream());
- delete fi;
- }
-
- m_BookRecords.Add(bookr);
- if (m_IndexCnt > 0)
- qsort(m_Index, m_IndexCnt, sizeof(HtmlContentsItem), IndexCompareFunc);
-
- return TRUE;
-}
-
-
-
-
-void wxHtmlHelpController::Display(const wxString& x)
-{
- int cnt;
- int i;
- wxFileSystem fsys;
- wxFSFile *f;
-
- CreateHelpWindow();
-
- /* 1. try to open given file: */
-
- cnt = m_BookRecords.GetCount();
- for (i = 0; i < cnt; i++) {
- f = fsys.OpenFile(m_BookRecords[i].GetBasePath() + x);
- if (f) {
- m_HtmlWin -> LoadPage(m_BookRecords[i].GetBasePath() + x);
- delete f;
- return;
- }
- }
-
-
- /* 2. try to find a book: */
-
- for (i = 0; i < cnt; i++) {
- if (m_BookRecords[i].GetTitle() == x) {
- m_HtmlWin -> LoadPage(m_BookRecords[i].GetBasePath() + m_BookRecords[i].GetStart());
- return;
- }
- }
-
- /* 3. try to find in contents: */
-
- cnt = m_ContentsCnt;
- for (i = 0; i < cnt; i++) {
- if (strcmp(m_Contents[i].m_Name, x) == 0) {
- m_HtmlWin -> LoadPage(m_Contents[i].m_Book -> GetBasePath() + m_Contents[i].m_Page);
- return;
- }
- }
-
-
- /* 4. try to find in index: */
-
- cnt = m_IndexCnt;
- for (i = 0; i < cnt; i++) {
- if (strcmp(m_Index[i].m_Name, x) == 0) {
- m_HtmlWin -> LoadPage(m_Index[i].m_Book -> GetBasePath() + m_Index[i].m_Page);
- return;
- }
- }
-
-
- /* 5. if everything failed, search the documents: */
-
- KeywordSearch(x);
-}
-
-
-
-void wxHtmlHelpController::Display(const int id)
-{
- CreateHelpWindow();
-
- for (int i = 0; i < m_ContentsCnt; i++) {
- if (m_Contents[i].m_ID == id) {
- m_HtmlWin -> LoadPage(m_Contents[i].m_Book -> GetBasePath() + m_Contents[i].m_Page);
- return;
- }
- }
-}
-
-
-
-void wxHtmlHelpController::DisplayContents()
-{
- CreateHelpWindow();
- m_Frame -> Raise();
- if (!m_Splitter -> IsSplit()) {
- m_NavigPan -> Show(TRUE);
- m_HtmlWin -> Show(TRUE);
- m_Splitter -> SplitVertically(m_NavigPan, m_HtmlWin, m_Cfg.sashpos);
- }
- m_NavigPan -> SetSelection(0);
-}
-
-
-
-void wxHtmlHelpController::DisplayIndex()
-{
- CreateHelpWindow();
- m_Frame -> Raise();
- if (!m_Splitter -> IsSplit()) {
- m_NavigPan -> Show(TRUE);
- m_HtmlWin -> Show(TRUE);
- m_Splitter -> SplitVertically(m_NavigPan, m_HtmlWin, m_Cfg.sashpos);
- }
- m_NavigPan -> SetSelection(1);
-}
-
-
-
-
-#if (wxVERSION_NUMBER < 2100) || ((wxVERSION_NUMBER == 2100) && (wxBETA_NUMBER < 7))
-
-class MyProgressDlg : public wxDialog
-{
- public:
- bool m_Canceled;
-
- MyProgressDlg(wxWindow *parent) : wxDialog(parent, -1,
- _("Searching..."),
- wxPoint(0, 0),
-#ifdef __WXGTK__
- wxSize(300, 110)
-#else
- wxSize(300, 130)
-#endif
- )
- {m_Canceled = FALSE;}
- void OnCancel(wxCommandEvent& event) {m_Canceled = TRUE;}
- DECLARE_EVENT_TABLE()
-};
-BEGIN_EVENT_TABLE(MyProgressDlg, wxDialog)
- EVT_BUTTON(wxID_CANCEL, MyProgressDlg::OnCancel)
-END_EVENT_TABLE()
-
-#endif
-
-
-bool wxHtmlHelpController::KeywordSearch(const wxString& keyword)
-{
- int foundcnt = 0;
- CreateHelpWindow();
- // if these are not set, we can't continue
- if (! (m_SearchList && m_HtmlWin))
- return FALSE;
- m_Frame -> Raise();
- if (m_Splitter && m_NavigPan && m_SearchButton) {
- if (!m_Splitter -> IsSplit()) {
- m_NavigPan -> Show(TRUE);
- m_HtmlWin -> Show(TRUE);
- m_Splitter -> SplitVertically(m_NavigPan, m_HtmlWin, m_Cfg.sashpos);
- }
- m_NavigPan -> SetSelection(2);
- m_SearchList -> Clear();
- m_SearchText -> SetValue(keyword);
- m_SearchButton -> Enable(FALSE);
- }
- {
- int cnt = m_ContentsCnt;
- wxSearchEngine engine;
- wxFileSystem fsys;
- wxFSFile *file;
- wxString lastpage = wxEmptyString;
- wxString foundstr;
-
-#if (wxVERSION_NUMBER < 2100) || ((wxVERSION_NUMBER == 2100) && (wxBETA_NUMBER < 7))
- MyProgressDlg progress(m_Frame);
-
- wxStaticText *prompt = new wxStaticText(&progress, -1, "", wxPoint(20, 50), wxSize(260, 25), wxALIGN_CENTER);
- wxGauge *gauge = new wxGauge(&progress, -1, cnt, wxPoint(20, 20), wxSize(260, 25));
- wxButton *btn = new wxButton(&progress, wxID_CANCEL, _("Cancel"), wxPoint(110, 70), wxSize(80, 25));
- btn = btn; /* fool compiler :-) */
- prompt -> SetLabel(_("No matching page found yet"));
-
- progress.Centre(wxBOTH);
- progress.Show(TRUE);
-#else
- wxProgressDialog progress(_("Searching..."), _("No matching page found yet"), cnt, m_Frame, wxPD_APP_MODAL | wxPD_CAN_ABORT | wxPD_AUTO_HIDE);
-#endif
-
- engine.LookFor(keyword);
-
- for (int i = 0; i < cnt; i++) {
-#if (wxVERSION_NUMBER < 2100) || ((wxVERSION_NUMBER == 2100) && (wxBETA_NUMBER < 7))
- gauge -> SetValue(i);
- if (progress.m_Canceled) break;
-#else
- if (progress.Update(i) == FALSE) break;
-#endif
- wxYield();
-
- file = fsys.OpenFile(m_Contents[i].m_Book -> GetBasePath() + m_Contents[i].m_Page);
- if (file) {
- if (lastpage != file -> GetLocation()) {
- lastpage = file -> GetLocation();
- if (engine.Scan(file -> GetStream())) {
- foundstr.Printf(_("Found %i matches"), ++foundcnt);
-#if (wxVERSION_NUMBER < 2100) || ((wxVERSION_NUMBER == 2100) && (wxBETA_NUMBER < 7))
- prompt -> SetLabel(foundstr);
-#else
- progress.Update(i, foundstr);
-#endif
- wxYield();
- m_SearchList -> Append(m_Contents[i].m_Name, (char*)(m_Contents + i));
- }
- }
- delete file;
- }
- }
-
-#if (wxVERSION_NUMBER < 2100) || ((wxVERSION_NUMBER == 2100) && (wxBETA_NUMBER < 7))
- progress.Close(TRUE);
-#endif
- }
- if (m_SearchButton)
- m_SearchButton -> Enable(TRUE);
- if (m_SearchText) {
- m_SearchText -> SetSelection(0, keyword.Length());
- m_SearchText -> SetFocus();
- }
- if (foundcnt) {
- HtmlContentsItem *it = (HtmlContentsItem*) m_SearchList -> GetClientData(0);
- if (it) m_HtmlWin -> LoadPage(it -> m_Book -> GetBasePath() + it -> m_Page);
- }
- return (foundcnt > 0);
-}
-
-
-
-
-
-
-void wxHtmlHelpController::CreateHelpWindow()
-{
- wxBusyCursor cur;
- wxString oldpath;
- wxStatusBar *sbar;
-
- if (m_Frame) {
- m_Frame -> Raise();
- m_Frame -> Show(TRUE);
- return;
- }
-
-#if wxUSE_BUSYINFO
- wxBusyInfo busyinfo(_("Preparing help window..."));
-#endif
-
- if (m_Config) ReadCustomization(m_Config, m_ConfigRoot);
-
- m_Frame = new wxFrame(NULL, -1, "", wxPoint(m_Cfg.x, m_Cfg.y), wxSize(m_Cfg.w, m_Cfg.h));
- m_Frame -> PushEventHandler(this);
- sbar = m_Frame -> CreateStatusBar();
-
- {
- wxToolBar *toolBar;
- toolBar = m_Frame -> CreateToolBar(wxNO_BORDER | wxTB_HORIZONTAL | wxTB_FLAT | wxTB_DOCKABLE);
- toolBar -> SetMargins(2, 2);
- wxBitmap* toolBarBitmaps[3];
-
-#ifdef __WXMSW__
- toolBarBitmaps[0] = new wxBitmap("panel");
- toolBarBitmaps[1] = new wxBitmap("back");
- toolBarBitmaps[2] = new wxBitmap("forward");
- int width = 24;
-#else
- toolBarBitmaps[0] = new wxBitmap(panel_xpm);
- toolBarBitmaps[1] = new wxBitmap(back_xpm);
- toolBarBitmaps[2] = new wxBitmap(forward_xpm);
- int width = 16;
-#endif
-
- int currentX = 5;
-
- toolBar -> AddTool(wxID_HTML_PANEL, *(toolBarBitmaps[0]), wxNullBitmap, FALSE, currentX, -1, (wxObject *) NULL, _("Show/hide navigation panel"));
- currentX += width + 5;
- toolBar -> AddSeparator();
- toolBar -> AddTool(wxID_HTML_BACK, *(toolBarBitmaps[1]), wxNullBitmap, FALSE, currentX, -1, (wxObject *) NULL, _("Go back to the previous HTML page"));
- currentX += width + 5;
- toolBar -> AddTool(wxID_HTML_FORWARD, *(toolBarBitmaps[2]), wxNullBitmap, FALSE, currentX, -1, (wxObject *) NULL, _("Go forward to the next HTML page"));
- currentX += width + 5;
-
- toolBar -> Realize();
-
- // Can delete the bitmaps since they're reference counted
- for (int i = 0; i < 3; i++) delete toolBarBitmaps[i];
- }
-
-
- {
- m_Splitter = new wxSplitterWindow(m_Frame);
-
- m_HtmlWin = new wxHtmlWindow(m_Splitter);
- m_HtmlWin -> SetRelatedFrame(m_Frame, m_TitleFormat);
- m_HtmlWin -> SetRelatedStatusBar(0);
- if (m_Config) m_HtmlWin -> ReadCustomization(m_Config, m_ConfigRoot);
-
- m_NavigPan = new wxNotebook(m_Splitter, wxID_HTML_NOTEBOOK, wxDefaultPosition, wxDefaultSize);
- {
- m_ContentsBox = new wxTreeCtrl(m_NavigPan, wxID_HTML_TREECTRL, wxDefaultPosition, wxDefaultSize, wxTR_HAS_BUTTONS | wxSUNKEN_BORDER);
- m_ContentsBox -> SetImageList(m_ContentsImageList);
- m_NavigPan -> AddPage(m_ContentsBox, _("Contents"));
- }
-
- {
- wxWindow *dummy = new wxPanel(m_NavigPan, wxID_HTML_INDEXPAGE);
- wxLayoutConstraints *b1 = new wxLayoutConstraints;
- b1 -> top.SameAs (dummy, wxTop, 0);
- b1 -> left.SameAs (dummy, wxLeft, 0);
- b1 -> width.PercentOf (dummy, wxWidth, 100);
- b1 -> bottom.SameAs (dummy, wxBottom, 0);
- m_IndexBox = new wxListBox(dummy, wxID_HTML_INDEXLIST, wxDefaultPosition, wxDefaultSize, 0);
- m_IndexBox -> SetConstraints(b1);
- dummy -> SetAutoLayout(TRUE);
- m_NavigPan -> AddPage(dummy, _("Index"));
- }
-
- {
- wxWindow *dummy = new wxPanel(m_NavigPan, wxID_HTML_SEARCHPAGE);
-
- wxLayoutConstraints *b1 = new wxLayoutConstraints;
- m_SearchText = new wxTextCtrl(dummy, wxID_HTML_SEARCHTEXT);
- b1 -> top.SameAs (dummy, wxTop, 0);
- b1 -> left.SameAs (dummy, wxLeft, 0);
- b1 -> right.SameAs (dummy, wxRight, 0);
- b1 -> height.AsIs();
- m_SearchText -> SetConstraints(b1);
-
- wxLayoutConstraints *b2 = new wxLayoutConstraints;
- m_SearchButton = new wxButton(dummy, wxID_HTML_SEARCHBUTTON, _("Search!"));
- b2 -> top.Below (m_SearchText, 10);
- b2 -> right.SameAs (dummy, wxRight, 10);
- b2 -> width.AsIs();
- b2 -> height.AsIs();
- m_SearchButton -> SetConstraints(b2);
-
- wxLayoutConstraints *b3 = new wxLayoutConstraints;
- m_SearchList = new wxListBox(dummy, wxID_HTML_SEARCHLIST, wxDefaultPosition, wxDefaultSize, 0);
- b3 -> top.Below (m_SearchButton, 10);
- b3 -> left.SameAs (dummy, wxLeft, 0);
- b3 -> right.SameAs (dummy, wxRight, 0);
- b3 -> bottom.SameAs (dummy, wxBottom, 0);
- m_SearchList -> SetConstraints(b3);
-
- dummy -> SetAutoLayout(TRUE);
- dummy -> Layout();
- m_NavigPan -> AddPage(dummy, _("Search"));
- }
-
- RefreshLists();
- m_NavigPan -> Show(TRUE);
- m_HtmlWin -> Show(TRUE);
- m_Splitter -> SetMinimumPaneSize(20);
- m_Splitter -> SplitVertically(m_NavigPan, m_HtmlWin, m_Cfg.sashpos);
- if (!m_Cfg.navig_on) m_Splitter -> Unsplit(m_NavigPan);
- wxYield();
- }
-
- m_Frame -> Show(TRUE);
- wxYield();
-}
-
-
-
-#define MAX_ROOTS 64
-
-void wxHtmlHelpController::CreateContents()
-{
- HtmlContentsItem *it;
- wxTreeItemId roots[MAX_ROOTS];
- bool imaged[MAX_ROOTS];
- int count = m_ContentsCnt;
-
- m_ContentsBox -> DeleteAllItems();
- roots[0] = m_ContentsBox -> AddRoot(_("(Help)"));
- imaged[0] = TRUE;
-
- for (int i = 0; i < count; i++) {
- it = m_Contents + i;
- roots[it -> m_Level + 1] = m_ContentsBox -> AppendItem(roots[it -> m_Level], it -> m_Name, IMG_Page, -1, new wxHtmlHelpTreeItemData(it));
- if (it -> m_Level == 0) {
- m_ContentsBox -> SetItemBold(roots[1], TRUE);
- m_ContentsBox -> SetItemImage(roots[1], IMG_Book);
- m_ContentsBox -> SetItemSelectedImage(roots[1], IMG_Book);
- imaged[1] = TRUE;
- }
- else imaged[it -> m_Level + 1] = FALSE;
-
- if (!imaged[it -> m_Level]) {
- m_ContentsBox -> SetItemImage(roots[it -> m_Level], IMG_Folder);
- m_ContentsBox -> SetItemSelectedImage(roots[it -> m_Level], IMG_Folder);
- imaged[it -> m_Level] = TRUE;
- }
- }
-
- m_ContentsBox -> Expand(roots[0]);
-}
-
-
-
-
-void wxHtmlHelpController::CreateIndex()
-{
- m_IndexBox -> Clear();
-
- for (int i = 0; i < m_IndexCnt; i++)
- m_IndexBox -> Append(m_Index[i].m_Name, (char*)(m_Index + i));
-}
-
-
-
-void wxHtmlHelpController::RefreshLists()
-{
- if (m_Frame) {
- CreateContents();
- CreateIndex();
- m_SearchList -> Clear();
- }
-}
-
-
-
-
-
-
-
-void wxHtmlHelpController::ReadCustomization(wxConfigBase *cfg, wxString path)
-{
- wxString oldpath;
- wxString tmp;
-
- if (path != wxEmptyString) {
- oldpath = cfg -> GetPath();
- cfg -> SetPath(path);
- }
-
- m_Cfg.navig_on = cfg -> Read("hcNavigPanel", m_Cfg.navig_on) != 0;
- m_Cfg.sashpos = cfg -> Read("hcSashPos", m_Cfg.sashpos);
- m_Cfg.x = cfg -> Read("hcX", m_Cfg.x);
- m_Cfg.y = cfg -> Read("hcY", m_Cfg.y);
- m_Cfg.w = cfg -> Read("hcW", m_Cfg.w);
- m_Cfg.h = cfg -> Read("hcH", m_Cfg.h);
-
- if (path != wxEmptyString)
- cfg -> SetPath(oldpath);
-}
-
-
-
-void wxHtmlHelpController::WriteCustomization(wxConfigBase *cfg, wxString path)
-{
- wxString oldpath;
- wxString tmp;
-
- if (path != wxEmptyString) {
- oldpath = cfg -> GetPath();
- cfg -> SetPath(path);
- }
-
- cfg -> Write("hcNavigPanel", m_Cfg.navig_on);
- cfg -> Write("hcSashPos", (long)m_Cfg.sashpos);
- cfg -> Write("hcX", (long)m_Cfg.x);
- cfg -> Write("hcY", (long)m_Cfg.y);
- cfg -> Write("hcW", (long)m_Cfg.w);
- cfg -> Write("hcH", (long)m_Cfg.h);
-
- if (path != wxEmptyString)
- cfg -> SetPath(oldpath);
-}
-
-
-
-
-
-/*
-EVENT HANDLING :
-*/
-
-
-void wxHtmlHelpController::OnToolbar(wxCommandEvent& event)
-{
- switch (event.GetId()) {
- case wxID_HTML_BACK :
- m_HtmlWin -> HistoryBack();
- break;
- case wxID_HTML_FORWARD :
- m_HtmlWin -> HistoryForward();
- break;
- case wxID_HTML_PANEL :
- if (m_Splitter -> IsSplit()) {
- m_Cfg.sashpos = m_Splitter -> GetSashPosition();
- m_Splitter -> Unsplit(m_NavigPan);
- }
- else {
- m_NavigPan -> Show(TRUE);
- m_HtmlWin -> Show(TRUE);
- m_Splitter -> SplitVertically(m_NavigPan, m_HtmlWin, m_Cfg.sashpos);
- }
- break;
- }
-}
-
-
-
-void wxHtmlHelpController::OnContentsSel(wxTreeEvent& event)
-{
- wxHtmlHelpTreeItemData *pg;
-
- pg = (wxHtmlHelpTreeItemData*) m_ContentsBox -> GetItemData(event.GetItem());
- if (pg) m_HtmlWin -> LoadPage(pg -> GetPage());
-}
-
-
-
-void wxHtmlHelpController::OnIndexSel(wxCommandEvent& event)
-{
- HtmlContentsItem *it = (HtmlContentsItem*) m_IndexBox -> GetClientData(m_IndexBox -> GetSelection());
- if (it) m_HtmlWin -> LoadPage(it -> m_Book -> GetBasePath() + it -> m_Page);
-}
-
-
-
-void wxHtmlHelpController::OnSearchSel(wxCommandEvent& event)
-{
- HtmlContentsItem *it = (HtmlContentsItem*) m_SearchList -> GetClientData(m_SearchList -> GetSelection());
- if (it) m_HtmlWin -> LoadPage(it -> m_Book -> GetBasePath() + it -> m_Page);
-}
-
-
-
-void wxHtmlHelpController::OnCloseWindow(wxCloseEvent& event)
-{
- int a, b;
-
- m_Cfg.navig_on = m_Splitter -> IsSplit();
- if (m_Cfg.navig_on)
- m_Cfg.sashpos = m_Splitter -> GetSashPosition();
- m_Frame -> GetPosition(&a, &b);
- m_Cfg.x = a, m_Cfg.y = b;
- m_Frame -> GetSize(&a, &b);
- m_Cfg.w = a, m_Cfg.h = b;
-
- if (m_Config) {
- WriteCustomization(m_Config, m_ConfigRoot);
- m_HtmlWin -> WriteCustomization(m_Config, m_ConfigRoot);
- }
- m_Frame = NULL;
-
- event.Skip();
-}
-
-
-
-void wxHtmlHelpController::OnSearch(wxCommandEvent& event)
-{
- wxString sr = m_SearchText -> GetLineText(0);
-
- if (sr != wxEmptyString) KeywordSearch(sr);
-}
-
-
-
-BEGIN_EVENT_TABLE(wxHtmlHelpController, wxEvtHandler)
- EVT_TOOL_RANGE(wxID_HTML_PANEL, wxID_HTML_FORWARD, wxHtmlHelpController::OnToolbar)
- EVT_TREE_SEL_CHANGED(wxID_HTML_TREECTRL, wxHtmlHelpController::OnContentsSel)
- EVT_LISTBOX(wxID_HTML_INDEXLIST, wxHtmlHelpController::OnIndexSel)
- EVT_LISTBOX(wxID_HTML_SEARCHLIST, wxHtmlHelpController::OnSearchSel)
- EVT_CLOSE(wxHtmlHelpController::OnCloseWindow)
- EVT_BUTTON(wxID_HTML_SEARCHBUTTON, wxHtmlHelpController::OnSearch)
- EVT_TEXT_ENTER(wxID_HTML_SEARCHTEXT, wxHtmlHelpController::OnSearch)
-END_EVENT_TABLE()
-
-
-
-#endif
-
diff --git a/src/html/search.cpp b/src/html/search.cpp
deleted file mode 100644
index 85a59a2b2e..0000000000
--- a/src/html/search.cpp
+++ /dev/null
@@ -1,72 +0,0 @@
-/////////////////////////////////////////////////////////////////////////////
-// Name: search.cpp
-// Purpose: search engine
-// Author: Vaclav Slavik
-// RCS-ID: $Id$
-// Copyright: (c) 1999 Vaclav Slavik
-// Licence: wxWindows Licence
-/////////////////////////////////////////////////////////////////////////////
-
-
-
-#ifdef __GNUG__
-#pragma implementation
-#endif
-
-#include "wx/wxprec.h"
-
-#include "wx/defs.h"
-#if wxUSE_HTML
-
-#ifdef __BORDLANDC__
-#pragma hdrstop
-#endif
-
-#ifndef WXPRECOMP
-#endif
-
-#include "wx/html/helpdata.h"
-
-
-//--------------------------------------------------------------------------------
-// wxSearchEngine
-//--------------------------------------------------------------------------------
-
-void wxSearchEngine::LookFor(const wxString& keyword)
-{
- if (m_Keyword) delete[] m_Keyword;
- m_Keyword = new wxChar[keyword.Length() + 1];
- wxStrcpy(m_Keyword, keyword.c_str());
- for (int i = wxStrlen(m_Keyword) - 1; i >= 0; i--)
- if ((m_Keyword[i] >= wxT('A')) && (m_Keyword[i] <= wxT('Z')))
- m_Keyword[i] += wxT('a') - wxT('A');
-}
-
-
-
-bool wxSearchEngine::Scan(wxInputStream *stream)
-{
- wxASSERT_MSG(m_Keyword != NULL, _("wxSearchEngine::LookFor must be called before scanning!"));
-
- int i, j;
- int lng = stream ->GetSize();
- int wrd = wxStrlen(m_Keyword);
- bool found = FALSE;
- char *buf = new char[lng + 1];
- stream -> Read(buf, lng);
- buf[lng] = 0;
-
- for (i = 0; i < lng; i++)
- if ((buf[i] >= 'A') && (buf[i] <= 'Z')) buf[i] += 'a' - 'A';
-
- for (i = 0; i < lng - wrd; i++) {
- j = 0;
- while ((j < wrd) && (buf[i + j] == m_Keyword[j])) j++;
- if (j == wrd) {found = TRUE; break;}
- }
-
- delete[] buf;
- return found;
-}
-
-#endif
diff --git a/src/png/makefile.nt b/src/png/makefile.nt
deleted file mode 100644
index ab823c8fc9..0000000000
--- a/src/png/makefile.nt
+++ /dev/null
@@ -1,68 +0,0 @@
-#
-# File: makefile.nt
-# Author: Julian Smart
-# Created: 1993
-# Updated:
-# Copyright: (c) 1993, AIAI, University of Edinburgh
-#
-# "%W% %G%"
-#
-# Makefile : Builds winpng.lib library for Windows 3.1
-
-# Change WXDIR or WXWIN to wherever wxWindows is found
-WXDIR = $(WXWIN)
-WXLIB = $(WXDIR)\lib\wx.lib
-WXINC = $(WXDIR)\include
-
-WINPNGDIR = ..\png
-WINPNGINC = $(WINPNGDIR)
-WINPNGLIB = ..\..\lib\winpng.lib
-
-INC = /I..\zlib
-
-FINAL=1
-
-# Set this to nothing if your compiler is MS C++ 7
-ZOPTION=
-
-!ifndef FINAL
-FINAL=0
-!endif
-
-PRECOMP=/YuWX.H
-
-!if "$(FINAL)" == "0"
-OPT = /Od
-CPPFLAGS= /W4 /Zi /MD /GX- $(ZOPTION) $(OPT) /Dwx_msw $(INC) # $(PRECOMP) /Fp$(WXDIR)\src\msw\wx.pch
-CFLAGS= /W4 /Zi /MD /GX- /Od /Dwx_msw $(INC)
-LINKFLAGS=/NOD /CO /ONERROR:NOEXE
-!else
-# /Ox for real FINAL version
-OPT = /O2
-CPPFLAGS= /W4 /MD /GX- /Dwx_msw $(INC) # $(PRECOMP) /Fp$(WXDIR)\src\msw\wx.pch
-CFLAGS= /W4 /MD /GX- /Dwx_msw $(INC)
-LINKFLAGS=/NOD /ONERROR:NOEXE
-!endif
-
-OBJECTS = png.obj pngread.obj pngrtran.obj pngrutil.obj \
- pngpread.obj pngtrans.obj pngwrite.obj pngwtran.obj pngwutil.obj \
- pngerror.obj pngmem.obj pngwio.obj pngrio.obj pngget.obj pngset.obj
-
-all: $(WINPNGLIB)
-
-$(WINPNGLIB): $(OBJECTS)
- erase $(WINPNGLIB)
- lib @<<
--out:$(WINPNGLIB)
-$(OBJECTS)
-<<
-
-.c.obj:
- cl -DWIN32 $(OPT) $(CFLAGS) /c $*.c
-
-clean:
- erase *.obj
- erase *.exe
- erase *.lib
-
-cleanall: clean
diff --git a/src/regex/COPYRIGHT b/src/regex/COPYRIGHT
index 65aaadd6cf..30c1f7a488 100644
--- a/src/regex/COPYRIGHT
+++ b/src/regex/COPYRIGHT
@@ -1,166 +1,20 @@
-This regular expression package was originally developed by Henry Spencer.
-It bears the following copyright notice:
+Copyright 1992, 1993, 1994, 1997 Henry Spencer. All rights reserved.
+This software is not subject to any license of the American Telephone
+and Telegraph Company or of the Regents of the University of California.
-**********************************************************************
+Permission is granted to anyone to use this software for any purpose on
+any computer system, and to alter it and redistribute it, subject
+to the following restrictions:
-Copyright (c) 1998, 1999 Henry Spencer. All rights reserved.
+1. The author is not responsible for the consequences of use of this
+ software, no matter how awful, even if they arise from flaws in it.
-Development of this software was funded, in part, by Cray Research Inc.,
-UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
-Corporation, none of whom are responsible for the results. The author
-thanks all of them.
+2. The origin of this software must not be misrepresented, either by
+ explicit claim or by omission. Since few users ever read sources,
+ credits must appear in the documentation.
-Redistribution and use in source and binary forms -- with or without
-modification -- are permitted for any purpose, provided that
-redistributions in source form retain this entire copyright notice and
-indicate the origin and nature of any modifications.
-
-I'd appreciate being given credit for this package in the documentation
-of software which uses it, but that is not a requirement.
-
-THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
-INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
-AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
-HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
-OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
-WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
-OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
-ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-**********************************************************************
-
-PostgreSQL adopted the code out of Tcl 8.4.1. Portions of regc_locale.c
-and re_syntax.n were developed by Tcl developers other than Henry; these
-files bear the Tcl copyright and license notice:
-
-**********************************************************************
-
-This software is copyrighted by the Regents of the University of
-California, Sun Microsystems, Inc., Scriptics Corporation, ActiveState
-Corporation and other parties. The following terms apply to all files
-associated with the software unless explicitly disclaimed in
-individual files.
-
-The authors hereby grant permission to use, copy, modify, distribute,
-and license this software and its documentation for any purpose, provided
-that existing copyright notices are retained in all copies and that this
-notice is included verbatim in any distributions. No written agreement,
-license, or royalty fee is required for any of the authorized uses.
-Modifications to this software may be copyrighted by their authors
-and need not follow the licensing terms described here, provided that
-the new terms are clearly indicated on the first page of each file where
-they apply.
-
-IN NO EVENT SHALL THE AUTHORS OR DISTRIBUTORS BE LIABLE TO ANY PARTY
-FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
-ARISING OUT OF THE USE OF THIS SOFTWARE, ITS DOCUMENTATION, OR ANY
-DERIVATIVES THEREOF, EVEN IF THE AUTHORS HAVE BEEN ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
-
-THE AUTHORS AND DISTRIBUTORS SPECIFICALLY DISCLAIM ANY WARRANTIES,
-INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE, AND NON-INFRINGEMENT. THIS SOFTWARE
-IS PROVIDED ON AN "AS IS" BASIS, AND THE AUTHORS AND DISTRIBUTORS HAVE
-NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR
-MODIFICATIONS.
-
-GOVERNMENT USE: If you are acquiring this software on behalf of the
-U.S. government, the Government shall have only "Restricted Rights"
-in the software and related documentation as defined in the Federal
-Acquisition Regulations (FARs) in Clause 52.227.19 (c) (2). If you
-are acquiring the software on behalf of the Department of Defense, the
-software shall be classified as "Commercial Computer Software" and the
-Government shall have only "Restricted Rights" as defined in Clause
-252.227-7013 (c) (1) of DFARs. Notwithstanding the foregoing, the
-authors grant the U.S. Government and others acting in its behalf
-permission to use and distribute the software in accordance with the
-terms specified in this license.
-
-**********************************************************************
-
-Subsequent modifications to the code by the PostgreSQL project follow
-the same license terms as the rest of PostgreSQL.
-(License follows)
-****************************************************************************
-PostgreSQL Database Management System
-(formerly known as Postgres, then as Postgres95)
-
-Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
-
-Portions Copyright (c) 1994, The Regents of the University of California
-
-Permission to use, copy, modify, and distribute this software and its
-documentation for any purpose, without fee, and without a written agreement
-is hereby granted, provided that the above copyright notice and this
-paragraph and the following two paragraphs appear in all copies.
-
-IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY FOR
-DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING
-LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS
-DOCUMENTATION, EVEN IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
-
-THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES,
-INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
-AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
-ON AN "AS IS" BASIS, AND THE UNIVERSITY OF CALIFORNIA HAS NO OBLIGATIONS TO
-PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
-****************************************************************************
-And if that's not enough, changes made from wxWindows are put under the
-wxWindows license:
-****************************************************************************
- wxWindows Library Licence, Version 3
- ====================================
-
- Copyright (C) 1998 Julian Smart, Robert Roebling [, ...]
-
- Everyone is permitted to copy and distribute verbatim copies
- of this licence document, but changing it is not allowed.
-
- WXWINDOWS LIBRARY LICENCE
- TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
-
- This library is free software; you can redistribute it and/or modify it
- under the terms of the GNU Library General Public Licence as published by
- the Free Software Foundation; either version 2 of the Licence, or (at
- your option) any later version.
-
- This library is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library
- General Public Licence for more details.
-
- You should have received a copy of the GNU Library General Public Licence
- along with this software, usually in a file named COPYING.LIB. If not,
- write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330,
- Boston, MA 02111-1307 USA.
-
- EXCEPTION NOTICE
-
- 1. As a special exception, the copyright holders of this library give
- permission for additional uses of the text contained in this release of
- the library as licenced under the wxWindows Library Licence, applying
- either version 3 of the Licence, or (at your option) any later version of
- the Licence as published by the copyright holders of version 3 of the
- Licence document.
-
- 2. The exception is that you may use, copy, link, modify and distribute
- under the user's own terms, binary object code versions of works based
- on the Library.
-
- 3. If you copy code from files distributed under the terms of the GNU
- General Public Licence or the GNU Library General Public Licence into a
- copy of this library, as this licence permits, the exception does not
- apply to the code that you add in this way. To avoid misleading anyone as
- to the status of such modified files, you must delete this exception
- notice from such code and/or adjust the licensing conditions notice
- accordingly.
-
- 4. If you write modifications of your own for this library, it is your
- choice whether to permit this exception to apply to your modifications.
- If you do not wish that, you must delete the exception notice from such
- code and/or adjust the licensing conditions notice accordingly.
-****************************************************************************
+3. Altered versions must be plainly marked as such, and must not be
+ misrepresented as being the original software. Since few users
+ ever read sources, credits must appear in the documentation.
+4. This notice may not be removed or altered.
diff --git a/src/regex/Makefile b/src/regex/Makefile
index cb76d37ea7..ce20561fa9 100644
--- a/src/regex/Makefile
+++ b/src/regex/Makefile
@@ -1,28 +1,130 @@
-#-------------------------------------------------------------------------
-#
-# Makefile--
-# Makefile for backend/regex
-#
-# IDENTIFICATION
-# $Header: /projects/cvsroot/pgsql-server/src/backend/regex/Makefile,v 1.20 2003/02/05 17:41:32 tgl Exp $
-#
-#-------------------------------------------------------------------------
+# You probably want to take -DREDEBUG out of CFLAGS, and put something like
+# -O in, *after* testing (-DREDEBUG strengthens testing by enabling a lot of
+# internal assertion checking and some debugging facilities).
+# Put -Dconst= in for a pre-ANSI compiler.
+# Do not take -DPOSIX_MISTAKE out.
+# REGCFLAGS isn't important to you (it's for my use in some special contexts).
+CFLAGS=-I. -DPOSIX_MISTAKE -DREDEBUG $(REGCFLAGS)
-subdir = src/backend/regex
-top_builddir = ../../..
-include $(top_builddir)/src/Makefile.global
+# If you have a pre-ANSI compiler, put -o into MKHFLAGS. If you want
+# the Berkeley __P macro, put -b in.
+MKHFLAGS=
-OBJS = regcomp.o regerror.o regexec.o regfree.o
+# Flags for linking but not compiling, if any.
+LDFLAGS=
-all: SUBSYS.o
+# Extra libraries for linking, if any.
+LIBS=
-SUBSYS.o: $(OBJS)
- $(LD) $(LDREL) $(LDOUT) SUBSYS.o $(OBJS)
+# Internal stuff, should not need changing.
+OBJPRODN=regcomp.o regexec.o regerror.o regfree.o
+OBJS=$(OBJPRODN) split.o debug.o re_main.o
+H=cclass.h cname.h regex2.h utils.h
+REGSRC=regcomp.c regerror.c regexec.c regfree.c
+ALLSRC=$(REGSRC) engine.c debug.c re_main.c split.c
-# mark inclusion dependencies between .c files explicitly
-regcomp.o: regcomp.c regc_lex.c regc_color.c regc_nfa.c regc_cvec.c regc_locale.c
+# Stuff that matters only if you're trying to lint the package.
+LINTFLAGS=-I. -Dstatic= -Dconst= -DREDEBUG
+LINTC=regcomp.c regexec.c regerror.c regfree.c debug.c re_main.c
+JUNKLINT=possible pointer alignment|null effect
-regexec.o: regexec.c rege_dfa.c
+# arrangements to build forward-reference header files
+.SUFFIXES: .ih .h
+.c.ih:
+ sh ./mkh $(MKHFLAGS) -p $< >$@
-clean:
- rm -f SUBSYS.o $(OBJS)
+default: r
+
+lib: purge $(OBJPRODN)
+ rm -f libregex.a
+ ar crv libregex.a $(OBJPRODN)
+
+purge:
+ rm -f *.o
+
+# stuff to build regex.h
+REGEXH=regex.h
+REGEXHSRC=regex2.h $(REGSRC)
+$(REGEXH): $(REGEXHSRC) mkh
+ sh ./mkh $(MKHFLAGS) -i _REGEX_H_ $(REGEXHSRC) >regex.tmp
+ cmp -s regex.tmp regex.h 2>/dev/null || cp regex.tmp regex.h
+ rm -f regex.tmp
+
+# dependencies
+$(OBJPRODN) debug.o: utils.h regex.h regex2.h
+regcomp.o: cclass.h cname.h regcomp.ih
+regexec.o: engine.c engine.ih
+regerror.o: regerror.ih
+debug.o: debug.ih
+re_main.o: re_main.ih
+
+# tester
+re: $(OBJS)
+ $(CC) $(CFLAGS) $(LDFLAGS) $(OBJS) $(LIBS) -o $@
+
+# regression test
+r: re tests
+ ./re &1 | egrep -v '$(JUNKLINT)' | tee lint
+
+fullprint:
+ ti README WHATSNEW notes todo | list
+ ti *.h | list
+ list *.c
+ list regex.3 regex.7
+
+print:
+ ti README WHATSNEW notes todo | list
+ ti *.h | list
+ list reg*.c engine.c
+
+
+mf.tmp: Makefile
+ sed '/^REGEXH=/s/=.*/=regex.h/' Makefile | sed '/#DEL$$/d' >$@
+
+DTRH=cclass.h cname.h regex2.h utils.h
+PRE=COPYRIGHT README WHATSNEW
+POST=mkh regex.3 regex.7 tests $(DTRH) $(ALLSRC) fake/*.[ch]
+FILES=$(PRE) Makefile $(POST)
+DTR=$(PRE) Makefile=mf.tmp $(POST)
+dtr: $(FILES) mf.tmp
+ makedtr $(DTR) >$@
+ rm mf.tmp
+
+cio: $(FILES)
+ cio $(FILES)
+
+rdf: $(FILES)
+ rcsdiff -c $(FILES) 2>&1 | p
+
+# various forms of cleanup
+tidy:
+ rm -f junk* core core.* *.core dtr *.tmp lint
+
+clean: tidy
+ rm -f *.o *.s *.ih re libregex.a
+
+# don't do this one unless you know what you're doing
+spotless: clean
+ rm -f mkh regex.h
diff --git a/src/regex/cclass.h b/src/regex/cclass.h
new file mode 100644
index 0000000000..2b50a76197
--- /dev/null
+++ b/src/regex/cclass.h
@@ -0,0 +1,20 @@
+/* character-class table */
+static struct cclass {
+ char *name;
+ char *chars;
+ char *multis;
+} cclasses[] = {
+ { "alnum", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789", "" },
+ { "alpha", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz", "" },
+ { "blank", " \t", "" },
+ { "cntrl", "\007\b\t\n\v\f\r\1\2\3\4\5\6\16\17\20\21\22\23\24\25\26\27\30\31\32\33\34\35\36\37\177", "" },
+ { "digit", "0123456789", "" },
+ { "graph", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", "" },
+ { "lower", "abcdefghijklmnopqrstuvwxyz", "" },
+ { "print", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~ ", "" },
+ { "punct", "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", "" },
+ { "space", "\t\n\v\f\r ", "" },
+ { "upper", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "" },
+ { "xdigit", "0123456789ABCDEFabcdef", "" },
+ { NULL, 0, "" }
+};
diff --git a/src/regex/cname.h b/src/regex/cname.h
new file mode 100644
index 0000000000..c1a6dd5656
--- /dev/null
+++ b/src/regex/cname.h
@@ -0,0 +1,102 @@
+/* character-name table */
+static struct cname {
+ char *name;
+ char code;
+} cnames[] = {
+ { "NUL", '\0' },
+ { "SOH", '\001' },
+ { "STX", '\002' },
+ { "ETX", '\003' },
+ { "EOT", '\004' },
+ { "ENQ", '\005' },
+ { "ACK", '\006' },
+ { "BEL", '\007' },
+ { "alert", '\007' },
+ { "BS", '\010' },
+ { "backspace", '\b' },
+ { "HT", '\011' },
+ { "tab", '\t' },
+ { "LF", '\012' },
+ { "newline", '\n' },
+ { "VT", '\013' },
+ { "vertical-tab", '\v' },
+ { "FF", '\014' },
+ { "form-feed", '\f' },
+ { "CR", '\015' },
+ { "carriage-return", '\r' },
+ { "SO", '\016' },
+ { "SI", '\017' },
+ { "DLE", '\020' },
+ { "DC1", '\021' },
+ { "DC2", '\022' },
+ { "DC3", '\023' },
+ { "DC4", '\024' },
+ { "NAK", '\025' },
+ { "SYN", '\026' },
+ { "ETB", '\027' },
+ { "CAN", '\030' },
+ { "EM", '\031' },
+ { "SUB", '\032' },
+ { "ESC", '\033' },
+ { "IS4", '\034' },
+ { "FS", '\034' },
+ { "IS3", '\035' },
+ { "GS", '\035' },
+ { "IS2", '\036' },
+ { "RS", '\036' },
+ { "IS1", '\037' },
+ { "US", '\037' },
+ { "space", ' ' },
+ { "exclamation-mark", '!' },
+ { "quotation-mark", '"' },
+ { "number-sign", '#' },
+ { "dollar-sign", '$' },
+ { "percent-sign", '%' },
+ { "ampersand", '&' },
+ { "apostrophe", '\'' },
+ { "left-parenthesis", '(' },
+ { "right-parenthesis", ')' },
+ { "asterisk", '*' },
+ { "plus-sign", '+' },
+ { "comma", ',' },
+ { "hyphen", '-' },
+ { "hyphen-minus", '-' },
+ { "period", '.' },
+ { "full-stop", '.' },
+ { "slash", '/' },
+ { "solidus", '/' },
+ { "zero", '0' },
+ { "one", '1' },
+ { "two", '2' },
+ { "three", '3' },
+ { "four", '4' },
+ { "five", '5' },
+ { "six", '6' },
+ { "seven", '7' },
+ { "eight", '8' },
+ { "nine", '9' },
+ { "colon", ':' },
+ { "semicolon", ';' },
+ { "less-than-sign", '<' },
+ { "equals-sign", '=' },
+ { "greater-than-sign", '>' },
+ { "question-mark", '?' },
+ { "commercial-at", '@' },
+ { "left-square-bracket", '[' },
+ { "backslash", '\\' },
+ { "reverse-solidus", '\\' },
+ { "right-square-bracket", ']' },
+ { "circumflex", '^' },
+ { "circumflex-accent", '^' },
+ { "underscore", '_' },
+ { "low-line", '_' },
+ { "grave-accent", '`' },
+ { "left-brace", '{' },
+ { "left-curly-bracket", '{' },
+ { "vertical-line", '|' },
+ { "right-brace", '}' },
+ { "right-curly-bracket", '}' },
+ { "tilde", '~' },
+ { "DEL", '\177' },
+ { NULL, 0 },
+};
diff --git a/src/regex/debug.c b/src/regex/debug.c
new file mode 100644
index 0000000000..bf40bbb3bd
--- /dev/null
+++ b/src/regex/debug.c
@@ -0,0 +1,242 @@
+#include
+#include
+#include
+#include
+#include
+#include
+#include "regex.h"
+
+#include "utils.h"
+#include "regex2.h"
+#include "debug.ih"
+
+/*
+ - regprint - print a regexp for debugging
+ == void regprint(regex_t *r, FILE *d);
+ */
+void
+regprint(r, d)
+regex_t *r;
+FILE *d;
+{
+ register struct re_guts *g = r->re_g;
+ register int i;
+ register int c;
+ register int last;
+ int nincat[NC];
+
+ fprintf(d, "%ld states, %d categories", (long)g->nstates,
+ g->ncategories);
+ fprintf(d, ", first %ld last %ld", (long)g->firststate,
+ (long)g->laststate);
+ if (g->iflags&USEBOL)
+ fprintf(d, ", USEBOL");
+ if (g->iflags&USEEOL)
+ fprintf(d, ", USEEOL");
+ if (g->iflags&BAD)
+ fprintf(d, ", BAD");
+ if (g->nsub > 0)
+ fprintf(d, ", nsub=%ld", (long)g->nsub);
+ if (g->must != NULL)
+ fprintf(d, ", must(%ld) `%*s'", (long)g->mlen, (int)g->mlen,
+ g->must);
+ if (g->backrefs)
+ fprintf(d, ", backrefs");
+ if (g->nplus > 0)
+ fprintf(d, ", nplus %ld", (long)g->nplus);
+ fprintf(d, "\n");
+ s_print(g, d);
+ for (i = 0; i < g->ncategories; i++) {
+ nincat[i] = 0;
+ for (c = CHAR_MIN; c <= CHAR_MAX; c++)
+ if (g->categories[c] == i)
+ nincat[i]++;
+ }
+ fprintf(d, "cc0#%d", nincat[0]);
+ for (i = 1; i < g->ncategories; i++)
+ if (nincat[i] == 1) {
+ for (c = CHAR_MIN; c <= CHAR_MAX; c++)
+ if (g->categories[c] == i)
+ break;
+ fprintf(d, ", %d=%s", i, regchar(c));
+ }
+ fprintf(d, "\n");
+ for (i = 1; i < g->ncategories; i++)
+ if (nincat[i] != 1) {
+ fprintf(d, "cc%d\t", i);
+ last = -1;
+ for (c = CHAR_MIN; c <= CHAR_MAX+1; c++) /* +1 does flush */
+ if (c <= CHAR_MAX && g->categories[c] == i) {
+ if (last < 0) {
+ fprintf(d, "%s", regchar(c));
+ last = c;
+ }
+ } else {
+ if (last >= 0) {
+ if (last != c-1)
+ fprintf(d, "-%s",
+ regchar(c-1));
+ last = -1;
+ }
+ }
+ fprintf(d, "\n");
+ }
+}
+
+/*
+ - s_print - print the strip for debugging
+ == static void s_print(register struct re_guts *g, FILE *d);
+ */
+static void
+s_print(g, d)
+register struct re_guts *g;
+FILE *d;
+{
+ register sop *s;
+ register cset *cs;
+ register int i;
+ register int done = 0;
+ register sop opnd;
+ register int col = 0;
+ register int last;
+ register sopno offset = 2;
+# define GAP() { if (offset % 5 == 0) { \
+ if (col > 40) { \
+ fprintf(d, "\n\t"); \
+ col = 0; \
+ } else { \
+ fprintf(d, " "); \
+ col++; \
+ } \
+ } else \
+ col++; \
+ offset++; \
+ }
+
+ if (OP(g->strip[0]) != OEND)
+ fprintf(d, "missing initial OEND!\n");
+ for (s = &g->strip[1]; !done; s++) {
+ opnd = OPND(*s);
+ switch (OP(*s)) {
+ case OEND:
+ fprintf(d, "\n");
+ done = 1;
+ break;
+ case OCHAR:
+ if (strchr("\\|()^$.[+*?{}!<> ", (char)opnd) != NULL)
+ fprintf(d, "\\%c", (char)opnd);
+ else
+ fprintf(d, "%s", regchar((char)opnd));
+ break;
+ case OBOL:
+ fprintf(d, "^");
+ break;
+ case OEOL:
+ fprintf(d, "$");
+ break;
+ case OBOW:
+ fprintf(d, "\\{");
+ break;
+ case OEOW:
+ fprintf(d, "\\}");
+ break;
+ case OANY:
+ fprintf(d, ".");
+ break;
+ case OANYOF:
+ fprintf(d, "[(%ld)", (long)opnd);
+ cs = &g->sets[opnd];
+ last = -1;
+ for (i = 0; i < g->csetsize+1; i++) /* +1 flushes */
+ if (CHIN(cs, i) && i < g->csetsize) {
+ if (last < 0) {
+ fprintf(d, "%s", regchar(i));
+ last = i;
+ }
+ } else {
+ if (last >= 0) {
+ if (last != i-1)
+ fprintf(d, "-%s",
+ regchar(i-1));
+ last = -1;
+ }
+ }
+ fprintf(d, "]");
+ break;
+ case OBACK_:
+ fprintf(d, "(\\<%ld>", (long)opnd);
+ break;
+ case O_BACK:
+ fprintf(d, "<%ld>\\)", (long)opnd);
+ break;
+ case OPLUS_:
+ fprintf(d, "(+");
+ if (OP(*(s+opnd)) != O_PLUS)
+ fprintf(d, "<%ld>", (long)opnd);
+ break;
+ case O_PLUS:
+ if (OP(*(s-opnd)) != OPLUS_)
+ fprintf(d, "<%ld>", (long)opnd);
+ fprintf(d, "+)");
+ break;
+ case OQUEST_:
+ fprintf(d, "(?");
+ if (OP(*(s+opnd)) != O_QUEST)
+ fprintf(d, "<%ld>", (long)opnd);
+ break;
+ case O_QUEST:
+ if (OP(*(s-opnd)) != OQUEST_)
+ fprintf(d, "<%ld>", (long)opnd);
+ fprintf(d, "?)");
+ break;
+ case OLPAREN:
+ fprintf(d, "((<%ld>", (long)opnd);
+ break;
+ case ORPAREN:
+ fprintf(d, "<%ld>))", (long)opnd);
+ break;
+ case OCH_:
+ fprintf(d, "<");
+ if (OP(*(s+opnd)) != OOR2)
+ fprintf(d, "<%ld>", (long)opnd);
+ break;
+ case OOR1:
+ if (OP(*(s-opnd)) != OOR1 && OP(*(s-opnd)) != OCH_)
+ fprintf(d, "<%ld>", (long)opnd);
+ fprintf(d, "|");
+ break;
+ case OOR2:
+ fprintf(d, "|");
+ if (OP(*(s+opnd)) != OOR2 && OP(*(s+opnd)) != O_CH)
+ fprintf(d, "<%ld>", (long)opnd);
+ break;
+ case O_CH:
+ if (OP(*(s-opnd)) != OOR1)
+ fprintf(d, "<%ld>", (long)opnd);
+ fprintf(d, ">");
+ break;
+ default:
+ fprintf(d, "!%d(%d)!", OP(*s), opnd);
+ break;
+ }
+ if (!done)
+ GAP();
+ }
+}
+
+/*
+ - regchar - make a character printable
+ == static char *regchar(int ch);
+ */
+static char * /* -> representation */
+regchar(ch)
+int ch;
+{
+ static char buf[10];
+
+ if (isprint(ch) || ch == ' ')
+ sprintf(buf, "%c", ch);
+ else
+ sprintf(buf, "\\%o", ch);
+ return(buf);
+}
diff --git a/src/regex/engine.c b/src/regex/engine.c
new file mode 100644
index 0000000000..0b88dcf1ed
--- /dev/null
+++ b/src/regex/engine.c
@@ -0,0 +1,1019 @@
+/*
+ * The matching engine and friends. This file is #included by regexec.c
+ * after suitable #defines of a variety of macros used herein, so that
+ * different state representations can be used without duplicating masses
+ * of code.
+ */
+
+#ifdef SNAMES
+#define matcher smatcher
+#define fast sfast
+#define slow sslow
+#define dissect sdissect
+#define backref sbackref
+#define step sstep
+#define print sprint
+#define at sat
+#define match smat
+#endif
+#ifdef LNAMES
+#define matcher lmatcher
+#define fast lfast
+#define slow lslow
+#define dissect ldissect
+#define backref lbackref
+#define step lstep
+#define print lprint
+#define at lat
+#define match lmat
+#endif
+
+/* another structure passed up and down to avoid zillions of parameters */
+struct match {
+ struct re_guts *g;
+ int eflags;
+ regmatch_t *pmatch; /* [nsub+1] (0 element unused) */
+ char *offp; /* offsets work from here */
+ char *beginp; /* start of string -- virtual NUL precedes */
+ char *endp; /* end of string -- virtual NUL here */
+ char *coldp; /* can be no match starting before here */
+ char **lastpos; /* [nplus+1] */
+ STATEVARS;
+ states st; /* current states */
+ states fresh; /* states for a fresh start */
+ states tmp; /* temporary */
+ states empty; /* empty set of states */
+};
+
+#include "engine.ih"
+
+#ifdef REDEBUG
+#define SP(t, s, c) print(m, t, s, c, stdout)
+#define AT(t, p1, p2, s1, s2) at(m, t, p1, p2, s1, s2)
+#define NOTE(str) { if (m->eflags®_TRACE) printf("=%s\n", (str)); }
+#else
+#define SP(t, s, c) /* nothing */
+#define AT(t, p1, p2, s1, s2) /* nothing */
+#define NOTE(s) /* nothing */
+#endif
+
+/*
+ - matcher - the actual matching engine
+ == static int matcher(register struct re_guts *g, char *string, \
+ == size_t nmatch, regmatch_t pmatch[], int eflags);
+ */
+static int /* 0 success, REG_NOMATCH failure */
+matcher(g, string, nmatch, pmatch, eflags)
+register struct re_guts *g;
+char *string;
+size_t nmatch;
+regmatch_t pmatch[];
+int eflags;
+{
+ register char *endp;
+ register size_t i;
+ struct match mv;
+ register struct match *m = &mv;
+ register char *dp;
+ const register sopno gf = g->firststate+1; /* +1 for OEND */
+ const register sopno gl = g->laststate;
+ char *start;
+ char *stop;
+
+ /* simplify the situation where possible */
+ if (g->cflags®_NOSUB)
+ nmatch = 0;
+ if (eflags®_STARTEND) {
+ start = string + pmatch[0].rm_so;
+ stop = string + pmatch[0].rm_eo;
+ } else {
+ start = string;
+ stop = start + strlen(start);
+ }
+ if (stop < start)
+ return(REG_INVARG);
+
+ /* prescreening; this does wonders for this rather slow code */
+ if (g->must != NULL) {
+ for (dp = start; dp < stop; dp++)
+ if (*dp == g->must[0] && stop - dp >= g->mlen &&
+ memcmp(dp, g->must, (size_t)g->mlen) == 0)
+ break;
+ if (dp == stop) /* we didn't find g->must */
+ return(REG_NOMATCH);
+ }
+
+ /* match struct setup */
+ m->g = g;
+ m->eflags = eflags;
+ m->pmatch = NULL;
+ m->lastpos = NULL;
+ m->offp = string;
+ m->beginp = start;
+ m->endp = stop;
+ STATESETUP(m, 4);
+ SETUP(m->st);
+ SETUP(m->fresh);
+ SETUP(m->tmp);
+ SETUP(m->empty);
+ CLEAR(m->empty);
+
+ /* this loop does only one repetition except for backrefs */
+ for (;;) {
+ endp = fast(m, start, stop, gf, gl);
+ if (endp == NULL) { /* a miss */
+ STATETEARDOWN(m);
+ return(REG_NOMATCH);
+ }
+ if (nmatch == 0 && !g->backrefs)
+ break; /* no further info needed */
+
+ /* where? */
+ assert(m->coldp != NULL);
+ for (;;) {
+ NOTE("finding start");
+ endp = slow(m, m->coldp, stop, gf, gl);
+ if (endp != NULL)
+ break;
+ assert(m->coldp < m->endp);
+ m->coldp++;
+ }
+ if (nmatch == 1 && !g->backrefs)
+ break; /* no further info needed */
+
+ /* oh my, he wants the subexpressions... */
+ if (m->pmatch == NULL)
+ m->pmatch = (regmatch_t *)malloc((m->g->nsub + 1) *
+ sizeof(regmatch_t));
+ if (m->pmatch == NULL) {
+ STATETEARDOWN(m);
+ return(REG_ESPACE);
+ }
+ for (i = 1; i <= m->g->nsub; i++)
+ m->pmatch[i].rm_so = m->pmatch[i].rm_eo = -1;
+ if (!g->backrefs && !(m->eflags®_BACKR)) {
+ NOTE("dissecting");
+ dp = dissect(m, m->coldp, endp, gf, gl);
+ } else {
+ if (g->nplus > 0 && m->lastpos == NULL)
+ m->lastpos = (char **)malloc((g->nplus+1) *
+ sizeof(char *));
+ if (g->nplus > 0 && m->lastpos == NULL) {
+ free(m->pmatch);
+ STATETEARDOWN(m);
+ return(REG_ESPACE);
+ }
+ NOTE("backref dissect");
+ dp = backref(m, m->coldp, endp, gf, gl, (sopno)0);
+ }
+ if (dp != NULL)
+ break;
+
+ /* uh-oh... we couldn't find a subexpression-level match */
+ assert(g->backrefs); /* must be back references doing it */
+ assert(g->nplus == 0 || m->lastpos != NULL);
+ for (;;) {
+ if (dp != NULL || endp <= m->coldp)
+ break; /* defeat */
+ NOTE("backoff");
+ endp = slow(m, m->coldp, endp-1, gf, gl);
+ if (endp == NULL)
+ break; /* defeat */
+ /* try it on a shorter possibility */
+#ifndef NDEBUG
+ for (i = 1; i <= m->g->nsub; i++) {
+ assert(m->pmatch[i].rm_so == -1);
+ assert(m->pmatch[i].rm_eo == -1);
+ }
+#endif
+ NOTE("backoff dissect");
+ dp = backref(m, m->coldp, endp, gf, gl, (sopno)0);
+ }
+ assert(dp == NULL || dp == endp);
+ if (dp != NULL) /* found a shorter one */
+ break;
+
+ /* despite initial appearances, there is no match here */
+ NOTE("false alarm");
+ start = m->coldp + 1; /* recycle starting later */
+ assert(start <= stop);
+ }
+
+ /* fill in the details if requested */
+ if (nmatch > 0) {
+ pmatch[0].rm_so = m->coldp - m->offp;
+ pmatch[0].rm_eo = endp - m->offp;
+ }
+ if (nmatch > 1) {
+ assert(m->pmatch != NULL);
+ for (i = 1; i < nmatch; i++)
+ if (i <= m->g->nsub)
+ pmatch[i] = m->pmatch[i];
+ else {
+ pmatch[i].rm_so = -1;
+ pmatch[i].rm_eo = -1;
+ }
+ }
+
+ if (m->pmatch != NULL)
+ free((char *)m->pmatch);
+ if (m->lastpos != NULL)
+ free((char *)m->lastpos);
+ STATETEARDOWN(m);
+ return(0);
+}
+
+/*
+ - dissect - figure out what matched what, no back references
+ == static char *dissect(register struct match *m, char *start, \
+ == char *stop, sopno startst, sopno stopst);
+ */
+static char * /* == stop (success) always */
+dissect(m, start, stop, startst, stopst)
+register struct match *m;
+char *start;
+char *stop;
+sopno startst;
+sopno stopst;
+{
+ register int i;
+ register sopno ss; /* start sop of current subRE */
+ register sopno es; /* end sop of current subRE */
+ register char *sp; /* start of string matched by it */
+ register char *stp; /* string matched by it cannot pass here */
+ register char *rest; /* start of rest of string */
+ register char *tail; /* string unmatched by rest of RE */
+ register sopno ssub; /* start sop of subsubRE */
+ register sopno esub; /* end sop of subsubRE */
+ register char *ssp; /* start of string matched by subsubRE */
+ register char *sep; /* end of string matched by subsubRE */
+ register char *oldssp; /* previous ssp */
+ register char *dp;
+
+ AT("diss", start, stop, startst, stopst);
+ sp = start;
+ for (ss = startst; ss < stopst; ss = es) {
+ /* identify end of subRE */
+ es = ss;
+ switch (OP(m->g->strip[es])) {
+ case OPLUS_:
+ case OQUEST_:
+ es += OPND(m->g->strip[es]);
+ break;
+ case OCH_:
+ while (OP(m->g->strip[es]) != O_CH)
+ es += OPND(m->g->strip[es]);
+ break;
+ }
+ es++;
+
+ /* figure out what it matched */
+ switch (OP(m->g->strip[ss])) {
+ case OEND:
+ assert(nope);
+ break;
+ case OCHAR:
+ sp++;
+ break;
+ case OBOL:
+ case OEOL:
+ case OBOW:
+ case OEOW:
+ break;
+ case OANY:
+ case OANYOF:
+ sp++;
+ break;
+ case OBACK_:
+ case O_BACK:
+ assert(nope);
+ break;
+ /* cases where length of match is hard to find */
+ case OQUEST_:
+ stp = stop;
+ for (;;) {
+ /* how long could this one be? */
+ rest = slow(m, sp, stp, ss, es);
+ assert(rest != NULL); /* it did match */
+ /* could the rest match the rest? */
+ tail = slow(m, rest, stop, es, stopst);
+ if (tail == stop)
+ break; /* yes! */
+ /* no -- try a shorter match for this one */
+ stp = rest - 1;
+ assert(stp >= sp); /* it did work */
+ }
+ ssub = ss + 1;
+ esub = es - 1;
+ /* did innards match? */
+ if (slow(m, sp, rest, ssub, esub) != NULL) {
+ dp = dissect(m, sp, rest, ssub, esub);
+ assert(dp == rest);
+ } else /* no */
+ assert(sp == rest);
+ sp = rest;
+ break;
+ case OPLUS_:
+ stp = stop;
+ for (;;) {
+ /* how long could this one be? */
+ rest = slow(m, sp, stp, ss, es);
+ assert(rest != NULL); /* it did match */
+ /* could the rest match the rest? */
+ tail = slow(m, rest, stop, es, stopst);
+ if (tail == stop)
+ break; /* yes! */
+ /* no -- try a shorter match for this one */
+ stp = rest - 1;
+ assert(stp >= sp); /* it did work */
+ }
+ ssub = ss + 1;
+ esub = es - 1;
+ ssp = sp;
+ oldssp = ssp;
+ for (;;) { /* find last match of innards */
+ sep = slow(m, ssp, rest, ssub, esub);
+ if (sep == NULL || sep == ssp)
+ break; /* failed or matched null */
+ oldssp = ssp; /* on to next try */
+ ssp = sep;
+ }
+ if (sep == NULL) {
+ /* last successful match */
+ sep = ssp;
+ ssp = oldssp;
+ }
+ assert(sep == rest); /* must exhaust substring */
+ assert(slow(m, ssp, sep, ssub, esub) == rest);
+ dp = dissect(m, ssp, sep, ssub, esub);
+ assert(dp == sep);
+ sp = rest;
+ break;
+ case OCH_:
+ stp = stop;
+ for (;;) {
+ /* how long could this one be? */
+ rest = slow(m, sp, stp, ss, es);
+ assert(rest != NULL); /* it did match */
+ /* could the rest match the rest? */
+ tail = slow(m, rest, stop, es, stopst);
+ if (tail == stop)
+ break; /* yes! */
+ /* no -- try a shorter match for this one */
+ stp = rest - 1;
+ assert(stp >= sp); /* it did work */
+ }
+ ssub = ss + 1;
+ esub = ss + OPND(m->g->strip[ss]) - 1;
+ assert(OP(m->g->strip[esub]) == OOR1);
+ for (;;) { /* find first matching branch */
+ if (slow(m, sp, rest, ssub, esub) == rest)
+ break; /* it matched all of it */
+ /* that one missed, try next one */
+ assert(OP(m->g->strip[esub]) == OOR1);
+ esub++;
+ assert(OP(m->g->strip[esub]) == OOR2);
+ ssub = esub + 1;
+ esub += OPND(m->g->strip[esub]);
+ if (OP(m->g->strip[esub]) == OOR2)
+ esub--;
+ else
+ assert(OP(m->g->strip[esub]) == O_CH);
+ }
+ dp = dissect(m, sp, rest, ssub, esub);
+ assert(dp == rest);
+ sp = rest;
+ break;
+ case O_PLUS:
+ case O_QUEST:
+ case OOR1:
+ case OOR2:
+ case O_CH:
+ assert(nope);
+ break;
+ case OLPAREN:
+ i = OPND(m->g->strip[ss]);
+ assert(0 < i && i <= m->g->nsub);
+ m->pmatch[i].rm_so = sp - m->offp;
+ break;
+ case ORPAREN:
+ i = OPND(m->g->strip[ss]);
+ assert(0 < i && i <= m->g->nsub);
+ m->pmatch[i].rm_eo = sp - m->offp;
+ break;
+ default: /* uh oh */
+ assert(nope);
+ break;
+ }
+ }
+
+ assert(sp == stop);
+ return(sp);
+}
+
+/*
+ - backref - figure out what matched what, figuring in back references
+ == static char *backref(register struct match *m, char *start, \
+ == char *stop, sopno startst, sopno stopst, sopno lev);
+ */
+static char * /* == stop (success) or NULL (failure) */
+backref(m, start, stop, startst, stopst, lev)
+register struct match *m;
+char *start;
+char *stop;
+sopno startst;
+sopno stopst;
+sopno lev; /* PLUS nesting level */
+{
+ register int i;
+ register sopno ss; /* start sop of current subRE */
+ register char *sp; /* start of string matched by it */
+ register sopno ssub; /* start sop of subsubRE */
+ register sopno esub; /* end sop of subsubRE */
+ register char *ssp; /* start of string matched by subsubRE */
+ register char *dp;
+ register size_t len;
+ register int hard;
+ register sop s;
+ register regoff_t offsave;
+ register cset *cs;
+
+ AT("back", start, stop, startst, stopst);
+ sp = start;
+
+ /* get as far as we can with easy stuff */
+ hard = 0;
+ for (ss = startst; !hard && ss < stopst; ss++)
+ switch (OP(s = m->g->strip[ss])) {
+ case OCHAR:
+ if (sp == stop || *sp++ != (char)OPND(s))
+ return(NULL);
+ break;
+ case OANY:
+ if (sp == stop)
+ return(NULL);
+ sp++;
+ break;
+ case OANYOF:
+ cs = &m->g->sets[OPND(s)];
+ if (sp == stop || !CHIN(cs, *sp++))
+ return(NULL);
+ break;
+ case OBOL:
+ if ( (sp == m->beginp && !(m->eflags®_NOTBOL)) ||
+ (sp < m->endp && *(sp-1) == '\n' &&
+ (m->g->cflags®_NEWLINE)) )
+ { /* yes */ }
+ else
+ return(NULL);
+ break;
+ case OEOL:
+ if ( (sp == m->endp && !(m->eflags®_NOTEOL)) ||
+ (sp < m->endp && *sp == '\n' &&
+ (m->g->cflags®_NEWLINE)) )
+ { /* yes */ }
+ else
+ return(NULL);
+ break;
+ case OBOW:
+ if (( (sp == m->beginp && !(m->eflags®_NOTBOL)) ||
+ (sp < m->endp && *(sp-1) == '\n' &&
+ (m->g->cflags®_NEWLINE)) ||
+ (sp > m->beginp &&
+ !ISWORD(*(sp-1))) ) &&
+ (sp < m->endp && ISWORD(*sp)) )
+ { /* yes */ }
+ else
+ return(NULL);
+ break;
+ case OEOW:
+ if (( (sp == m->endp && !(m->eflags®_NOTEOL)) ||
+ (sp < m->endp && *sp == '\n' &&
+ (m->g->cflags®_NEWLINE)) ||
+ (sp < m->endp && !ISWORD(*sp)) ) &&
+ (sp > m->beginp && ISWORD(*(sp-1))) )
+ { /* yes */ }
+ else
+ return(NULL);
+ break;
+ case O_QUEST:
+ break;
+ case OOR1: /* matches null but needs to skip */
+ ss++;
+ s = m->g->strip[ss];
+ do {
+ assert(OP(s) == OOR2);
+ ss += OPND(s);
+ } while (OP(s = m->g->strip[ss]) != O_CH);
+ /* note that the ss++ gets us past the O_CH */
+ break;
+ default: /* have to make a choice */
+ hard = 1;
+ break;
+ }
+ if (!hard) { /* that was it! */
+ if (sp != stop)
+ return(NULL);
+ return(sp);
+ }
+ ss--; /* adjust for the for's final increment */
+
+ /* the hard stuff */
+ AT("hard", sp, stop, ss, stopst);
+ s = m->g->strip[ss];
+ switch (OP(s)) {
+ case OBACK_: /* the vilest depths */
+ i = OPND(s);
+ assert(0 < i && i <= m->g->nsub);
+ if (m->pmatch[i].rm_eo == -1)
+ return(NULL);
+ assert(m->pmatch[i].rm_so != -1);
+ len = m->pmatch[i].rm_eo - m->pmatch[i].rm_so;
+ assert(stop - m->beginp >= len);
+ if (sp > stop - len)
+ return(NULL); /* not enough left to match */
+ ssp = m->offp + m->pmatch[i].rm_so;
+ if (memcmp(sp, ssp, len) != 0)
+ return(NULL);
+ while (m->g->strip[ss] != SOP(O_BACK, i))
+ ss++;
+ return(backref(m, sp+len, stop, ss+1, stopst, lev));
+ break;
+ case OQUEST_: /* to null or not */
+ dp = backref(m, sp, stop, ss+1, stopst, lev);
+ if (dp != NULL)
+ return(dp); /* not */
+ return(backref(m, sp, stop, ss+OPND(s)+1, stopst, lev));
+ break;
+ case OPLUS_:
+ assert(m->lastpos != NULL);
+ assert(lev+1 <= m->g->nplus);
+ m->lastpos[lev+1] = sp;
+ return(backref(m, sp, stop, ss+1, stopst, lev+1));
+ break;
+ case O_PLUS:
+ if (sp == m->lastpos[lev]) /* last pass matched null */
+ return(backref(m, sp, stop, ss+1, stopst, lev-1));
+ /* try another pass */
+ m->lastpos[lev] = sp;
+ dp = backref(m, sp, stop, ss-OPND(s)+1, stopst, lev);
+ if (dp == NULL)
+ return(backref(m, sp, stop, ss+1, stopst, lev-1));
+ else
+ return(dp);
+ break;
+ case OCH_: /* find the right one, if any */
+ ssub = ss + 1;
+ esub = ss + OPND(s) - 1;
+ assert(OP(m->g->strip[esub]) == OOR1);
+ for (;;) { /* find first matching branch */
+ dp = backref(m, sp, stop, ssub, esub, lev);
+ if (dp != NULL)
+ return(dp);
+ /* that one missed, try next one */
+ if (OP(m->g->strip[esub]) == O_CH)
+ return(NULL); /* there is none */
+ esub++;
+ assert(OP(m->g->strip[esub]) == OOR2);
+ ssub = esub + 1;
+ esub += OPND(m->g->strip[esub]);
+ if (OP(m->g->strip[esub]) == OOR2)
+ esub--;
+ else
+ assert(OP(m->g->strip[esub]) == O_CH);
+ }
+ break;
+ case OLPAREN: /* must undo assignment if rest fails */
+ i = OPND(s);
+ assert(0 < i && i <= m->g->nsub);
+ offsave = m->pmatch[i].rm_so;
+ m->pmatch[i].rm_so = sp - m->offp;
+ dp = backref(m, sp, stop, ss+1, stopst, lev);
+ if (dp != NULL)
+ return(dp);
+ m->pmatch[i].rm_so = offsave;
+ return(NULL);
+ break;
+ case ORPAREN: /* must undo assignment if rest fails */
+ i = OPND(s);
+ assert(0 < i && i <= m->g->nsub);
+ offsave = m->pmatch[i].rm_eo;
+ m->pmatch[i].rm_eo = sp - m->offp;
+ dp = backref(m, sp, stop, ss+1, stopst, lev);
+ if (dp != NULL)
+ return(dp);
+ m->pmatch[i].rm_eo = offsave;
+ return(NULL);
+ break;
+ default: /* uh oh */
+ assert(nope);
+ break;
+ }
+
+ /* "can't happen" */
+ assert(nope);
+ /* NOTREACHED */
+ return((char *)NULL); /* dummy */
+}
+
+/*
+ - fast - step through the string at top speed
+ == static char *fast(register struct match *m, char *start, \
+ == char *stop, sopno startst, sopno stopst);
+ */
+static char * /* where tentative match ended, or NULL */
+fast(m, start, stop, startst, stopst)
+register struct match *m;
+char *start;
+char *stop;
+sopno startst;
+sopno stopst;
+{
+ register states st = m->st;
+ register states fresh = m->fresh;
+ register states tmp = m->tmp;
+ register char *p = start;
+ register int c = (start == m->beginp) ? OUT : *(start-1);
+ register int lastc; /* previous c */
+ register int flagch;
+ register int i;
+ register char *coldp; /* last p after which no match was underway */
+
+ CLEAR(st);
+ SET1(st, startst);
+ st = step(m->g, startst, stopst, st, NOTHING, st);
+ ASSIGN(fresh, st);
+ SP("start", st, *p);
+ coldp = NULL;
+ for (;;) {
+ /* next character */
+ lastc = c;
+ c = (p == m->endp) ? OUT : *p;
+ if (EQ(st, fresh))
+ coldp = p;
+
+ /* is there an EOL and/or BOL between lastc and c? */
+ flagch = '\0';
+ i = 0;
+ if ( (lastc == '\n' && m->g->cflags®_NEWLINE) ||
+ (lastc == OUT && !(m->eflags®_NOTBOL)) ) {
+ flagch = BOL;
+ i = m->g->nbol;
+ }
+ if ( (c == '\n' && m->g->cflags®_NEWLINE) ||
+ (c == OUT && !(m->eflags®_NOTEOL)) ) {
+ flagch = (flagch == BOL) ? BOLEOL : EOL;
+ i += m->g->neol;
+ }
+ if (i != 0) {
+ for (; i > 0; i--)
+ st = step(m->g, startst, stopst, st, flagch, st);
+ SP("boleol", st, c);
+ }
+
+ /* how about a word boundary? */
+ if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) &&
+ (c != OUT && ISWORD(c)) ) {
+ flagch = BOW;
+ }
+ if ( (lastc != OUT && ISWORD(lastc)) &&
+ (flagch == EOL || (c != OUT && !ISWORD(c))) ) {
+ flagch = EOW;
+ }
+ if (flagch == BOW || flagch == EOW) {
+ st = step(m->g, startst, stopst, st, flagch, st);
+ SP("boweow", st, c);
+ }
+
+ /* are we done? */
+ if (ISSET(st, stopst) || p == stop)
+ break; /* NOTE BREAK OUT */
+
+ /* no, we must deal with this character */
+ ASSIGN(tmp, st);
+ ASSIGN(st, fresh);
+ assert(c != OUT);
+ st = step(m->g, startst, stopst, tmp, c, st);
+ SP("aft", st, c);
+ assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
+ p++;
+ }
+
+ assert(coldp != NULL);
+ m->coldp = coldp;
+ if (ISSET(st, stopst))
+ return(p+1);
+ else
+ return(NULL);
+}
+
+/*
+ - slow - step through the string more deliberately
+ == static char *slow(register struct match *m, char *start, \
+ == char *stop, sopno startst, sopno stopst);
+ */
+static char * /* where it ended */
+slow(m, start, stop, startst, stopst)
+register struct match *m;
+char *start;
+char *stop;
+sopno startst;
+sopno stopst;
+{
+ register states st = m->st;
+ register states empty = m->empty;
+ register states tmp = m->tmp;
+ register char *p = start;
+ register int c = (start == m->beginp) ? OUT : *(start-1);
+ register int lastc; /* previous c */
+ register int flagch;
+ register int i;
+ register char *matchp; /* last p at which a match ended */
+
+ AT("slow", start, stop, startst, stopst);
+ CLEAR(st);
+ SET1(st, startst);
+ SP("sstart", st, *p);
+ st = step(m->g, startst, stopst, st, NOTHING, st);
+ matchp = NULL;
+ for (;;) {
+ /* next character */
+ lastc = c;
+ c = (p == m->endp) ? OUT : *p;
+
+ /* is there an EOL and/or BOL between lastc and c? */
+ flagch = '\0';
+ i = 0;
+ if ( (lastc == '\n' && m->g->cflags®_NEWLINE) ||
+ (lastc == OUT && !(m->eflags®_NOTBOL)) ) {
+ flagch = BOL;
+ i = m->g->nbol;
+ }
+ if ( (c == '\n' && m->g->cflags®_NEWLINE) ||
+ (c == OUT && !(m->eflags®_NOTEOL)) ) {
+ flagch = (flagch == BOL) ? BOLEOL : EOL;
+ i += m->g->neol;
+ }
+ if (i != 0) {
+ for (; i > 0; i--)
+ st = step(m->g, startst, stopst, st, flagch, st);
+ SP("sboleol", st, c);
+ }
+
+ /* how about a word boundary? */
+ if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) &&
+ (c != OUT && ISWORD(c)) ) {
+ flagch = BOW;
+ }
+ if ( (lastc != OUT && ISWORD(lastc)) &&
+ (flagch == EOL || (c != OUT && !ISWORD(c))) ) {
+ flagch = EOW;
+ }
+ if (flagch == BOW || flagch == EOW) {
+ st = step(m->g, startst, stopst, st, flagch, st);
+ SP("sboweow", st, c);
+ }
+
+ /* are we done? */
+ if (ISSET(st, stopst))
+ matchp = p;
+ if (EQ(st, empty) || p == stop)
+ break; /* NOTE BREAK OUT */
+
+ /* no, we must deal with this character */
+ ASSIGN(tmp, st);
+ ASSIGN(st, empty);
+ assert(c != OUT);
+ st = step(m->g, startst, stopst, tmp, c, st);
+ SP("saft", st, c);
+ assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
+ p++;
+ }
+
+ return(matchp);
+}
+
+
+/*
+ - step - map set of states reachable before char to set reachable after
+ == static states step(register struct re_guts *g, sopno start, sopno stop, \
+ == register states bef, int ch, register states aft);
+ == #define BOL (OUT+1)
+ == #define EOL (BOL+1)
+ == #define BOLEOL (BOL+2)
+ == #define NOTHING (BOL+3)
+ == #define BOW (BOL+4)
+ == #define EOW (BOL+5)
+ == #define CODEMAX (BOL+5) // highest code used
+ == #define NONCHAR(c) ((c) > CHAR_MAX)
+ == #define NNONCHAR (CODEMAX-CHAR_MAX)
+ */
+static states
+step(g, start, stop, bef, ch, aft)
+register struct re_guts *g;
+sopno start; /* start state within strip */
+sopno stop; /* state after stop state within strip */
+register states bef; /* states reachable before */
+int ch; /* character or NONCHAR code */
+register states aft; /* states already known reachable after */
+{
+ register cset *cs;
+ register sop s;
+ register sopno pc;
+ register onestate here; /* note, macros know this name */
+ register sopno look;
+ register long i;
+
+ for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) {
+ s = g->strip[pc];
+ switch (OP(s)) {
+ case OEND:
+ assert(pc == stop-1);
+ break;
+ case OCHAR:
+ /* only characters can match */
+ assert(!NONCHAR(ch) || ch != (char)OPND(s));
+ if (ch == (char)OPND(s))
+ FWD(aft, bef, 1);
+ break;
+ case OBOL:
+ if (ch == BOL || ch == BOLEOL)
+ FWD(aft, bef, 1);
+ break;
+ case OEOL:
+ if (ch == EOL || ch == BOLEOL)
+ FWD(aft, bef, 1);
+ break;
+ case OBOW:
+ if (ch == BOW)
+ FWD(aft, bef, 1);
+ break;
+ case OEOW:
+ if (ch == EOW)
+ FWD(aft, bef, 1);
+ break;
+ case OANY:
+ if (!NONCHAR(ch))
+ FWD(aft, bef, 1);
+ break;
+ case OANYOF:
+ cs = &g->sets[OPND(s)];
+ if (!NONCHAR(ch) && CHIN(cs, ch))
+ FWD(aft, bef, 1);
+ break;
+ case OBACK_: /* ignored here */
+ case O_BACK:
+ FWD(aft, aft, 1);
+ break;
+ case OPLUS_: /* forward, this is just an empty */
+ FWD(aft, aft, 1);
+ break;
+ case O_PLUS: /* both forward and back */
+ FWD(aft, aft, 1);
+ i = ISSETBACK(aft, OPND(s));
+ BACK(aft, aft, OPND(s));
+ if (!i && ISSETBACK(aft, OPND(s))) {
+ /* oho, must reconsider loop body */
+ pc -= OPND(s) + 1;
+ INIT(here, pc);
+ }
+ break;
+ case OQUEST_: /* two branches, both forward */
+ FWD(aft, aft, 1);
+ FWD(aft, aft, OPND(s));
+ break;
+ case O_QUEST: /* just an empty */
+ FWD(aft, aft, 1);
+ break;
+ case OLPAREN: /* not significant here */
+ case ORPAREN:
+ FWD(aft, aft, 1);
+ break;
+ case OCH_: /* mark the first two branches */
+ FWD(aft, aft, 1);
+ assert(OP(g->strip[pc+OPND(s)]) == OOR2);
+ FWD(aft, aft, OPND(s));
+ break;
+ case OOR1: /* done a branch, find the O_CH */
+ if (ISSTATEIN(aft, here)) {
+ for (look = 1;
+ OP(s = g->strip[pc+look]) != O_CH;
+ look += OPND(s))
+ assert(OP(s) == OOR2);
+ FWD(aft, aft, look);
+ }
+ break;
+ case OOR2: /* propagate OCH_'s marking */
+ FWD(aft, aft, 1);
+ if (OP(g->strip[pc+OPND(s)]) != O_CH) {
+ assert(OP(g->strip[pc+OPND(s)]) == OOR2);
+ FWD(aft, aft, OPND(s));
+ }
+ break;
+ case O_CH: /* just empty */
+ FWD(aft, aft, 1);
+ break;
+ default: /* ooooops... */
+ assert(nope);
+ break;
+ }
+ }
+
+ return(aft);
+}
+
+#ifdef REDEBUG
+/*
+ - print - print a set of states
+ == #ifdef REDEBUG
+ == static void print(struct match *m, char *caption, states st, \
+ == int ch, FILE *d);
+ == #endif
+ */
+static void
+print(m, caption, st, ch, d)
+struct match *m;
+char *caption;
+states st;
+int ch;
+FILE *d;
+{
+ register struct re_guts *g = m->g;
+ register int i;
+ register int first = 1;
+
+ if (!(m->eflags®_TRACE))
+ return;
+
+ fprintf(d, "%s", caption);
+ if (ch != '\0')
+ fprintf(d, " %s", pchar(ch));
+ for (i = 0; i < g->nstates; i++)
+ if (ISSET(st, i)) {
+ fprintf(d, "%s%d", (first) ? "\t" : ", ", i);
+ first = 0;
+ }
+ fprintf(d, "\n");
+}
+
+/*
+ - at - print current situation
+ == #ifdef REDEBUG
+ == static void at(struct match *m, char *title, char *start, char *stop, \
+ == sopno startst, sopno stopst);
+ == #endif
+ */
+static void
+at(m, title, start, stop, startst, stopst)
+struct match *m;
+char *title;
+char *start;
+char *stop;
+sopno startst;
+sopno stopst;
+{
+ if (!(m->eflags®_TRACE))
+ return;
+
+ printf("%s %s-", title, pchar(*start));
+ printf("%s ", pchar(*stop));
+ printf("%ld-%ld\n", (long)startst, (long)stopst);
+}
+
+#ifndef PCHARDONE
+#define PCHARDONE /* never again */
+/*
+ - pchar - make a character printable
+ == #ifdef REDEBUG
+ == static char *pchar(int ch);
+ == #endif
+ *
+ * Is this identical to regchar() over in debug.c? Well, yes. But a
+ * duplicate here avoids having a debugging-capable regexec.o tied to
+ * a matching debug.o, and this is convenient. It all disappears in
+ * the non-debug compilation anyway, so it doesn't matter much.
+ */
+static char * /* -> representation */
+pchar(ch)
+int ch;
+{
+ static char pbuf[10];
+
+ if (isprint(ch) || ch == ' ')
+ sprintf(pbuf, "%c", ch);
+ else
+ sprintf(pbuf, "\\%o", ch);
+ return(pbuf);
+}
+#endif
+#endif
+
+#undef matcher
+#undef fast
+#undef slow
+#undef dissect
+#undef backref
+#undef step
+#undef print
+#undef at
+#undef match
diff --git a/src/regex/re_main.c b/src/regex/re_main.c
new file mode 100644
index 0000000000..0221e7713d
--- /dev/null
+++ b/src/regex/re_main.c
@@ -0,0 +1,510 @@
+#include
+#include
+#include
+#include
+#include
+
+#include "main.ih"
+
+char *progname;
+int debug = 0;
+int line = 0;
+int status = 0;
+
+int copts = REG_EXTENDED;
+int eopts = 0;
+regoff_t startoff = 0;
+regoff_t endoff = 0;
+
+
+extern int split();
+extern void regprint();
+
+/*
+ - main - do the simple case, hand off to regress() for regression
+ */
+main(argc, argv)
+int argc;
+char *argv[];
+{
+ regex_t re;
+# define NS 10
+ regmatch_t subs[NS];
+ char erbuf[100];
+ int err;
+ size_t len;
+ int c;
+ int errflg = 0;
+ register int i;
+ extern int optind;
+ extern char *optarg;
+
+ progname = argv[0];
+
+ while ((c = getopt(argc, argv, "c:e:S:E:x")) != EOF)
+ switch (c) {
+ case 'c': /* compile options */
+ copts = options('c', optarg);
+ break;
+ case 'e': /* execute options */
+ eopts = options('e', optarg);
+ break;
+ case 'S': /* start offset */
+ startoff = (regoff_t)atoi(optarg);
+ break;
+ case 'E': /* end offset */
+ endoff = (regoff_t)atoi(optarg);
+ break;
+ case 'x': /* Debugging. */
+ debug++;
+ break;
+ case '?':
+ default:
+ errflg++;
+ break;
+ }
+ if (errflg) {
+ fprintf(stderr, "usage: %s ", progname);
+ fprintf(stderr, "[-c copt][-C][-d] [re]\n");
+ exit(2);
+ }
+
+ if (optind >= argc) {
+ regress(stdin);
+ exit(status);
+ }
+
+ err = regcomp(&re, argv[optind++], copts);
+ if (err) {
+ len = regerror(err, &re, erbuf, sizeof(erbuf));
+ fprintf(stderr, "error %s, %d/%d `%s'\n",
+ eprint(err), len, sizeof(erbuf), erbuf);
+ exit(status);
+ }
+ regprint(&re, stdout);
+
+ if (optind >= argc) {
+ regfree(&re);
+ exit(status);
+ }
+
+ if (eopts®_STARTEND) {
+ subs[0].rm_so = startoff;
+ subs[0].rm_eo = strlen(argv[optind]) - endoff;
+ }
+ err = regexec(&re, argv[optind], (size_t)NS, subs, eopts);
+ if (err) {
+ len = regerror(err, &re, erbuf, sizeof(erbuf));
+ fprintf(stderr, "error %s, %d/%d `%s'\n",
+ eprint(err), len, sizeof(erbuf), erbuf);
+ exit(status);
+ }
+ if (!(copts®_NOSUB)) {
+ len = (int)(subs[0].rm_eo - subs[0].rm_so);
+ if (subs[0].rm_so != -1) {
+ if (len != 0)
+ printf("match `%.*s'\n", len,
+ argv[optind] + subs[0].rm_so);
+ else
+ printf("match `'@%.1s\n",
+ argv[optind] + subs[0].rm_so);
+ }
+ for (i = 1; i < NS; i++)
+ if (subs[i].rm_so != -1)
+ printf("(%d) `%.*s'\n", i,
+ (int)(subs[i].rm_eo - subs[i].rm_so),
+ argv[optind] + subs[i].rm_so);
+ }
+ exit(status);
+}
+
+/*
+ - regress - main loop of regression test
+ == void regress(FILE *in);
+ */
+void
+regress(in)
+FILE *in;
+{
+ char inbuf[1000];
+# define MAXF 10
+ char *f[MAXF];
+ int nf;
+ int i;
+ char erbuf[100];
+ size_t ne;
+ char *badpat = "invalid regular expression";
+# define SHORT 10
+ char *bpname = "REG_BADPAT";
+ regex_t re;
+
+ while (fgets(inbuf, sizeof(inbuf), in) != NULL) {
+ line++;
+ if (inbuf[0] == '#' || inbuf[0] == '\n')
+ continue; /* NOTE CONTINUE */
+ inbuf[strlen(inbuf)-1] = '\0'; /* get rid of stupid \n */
+ if (debug)
+ fprintf(stdout, "%d:\n", line);
+ nf = split(inbuf, f, MAXF, "\t\t");
+ if (nf < 3) {
+ fprintf(stderr, "bad input, line %d\n", line);
+ exit(1);
+ }
+ for (i = 0; i < nf; i++)
+ if (strcmp(f[i], "\"\"") == 0)
+ f[i] = "";
+ if (nf <= 3)
+ f[3] = NULL;
+ if (nf <= 4)
+ f[4] = NULL;
+ try(f[0], f[1], f[2], f[3], f[4], options('c', f[1]));
+ if (opt('&', f[1])) /* try with either type of RE */
+ try(f[0], f[1], f[2], f[3], f[4],
+ options('c', f[1]) &~ REG_EXTENDED);
+ }
+
+ ne = regerror(REG_BADPAT, (regex_t *)NULL, erbuf, sizeof(erbuf));
+ if (strcmp(erbuf, badpat) != 0 || ne != strlen(badpat)+1) {
+ fprintf(stderr, "end: regerror() test gave `%s' not `%s'\n",
+ erbuf, badpat);
+ status = 1;
+ }
+ ne = regerror(REG_BADPAT, (regex_t *)NULL, erbuf, (size_t)SHORT);
+ if (strncmp(erbuf, badpat, SHORT-1) != 0 || erbuf[SHORT-1] != '\0' ||
+ ne != strlen(badpat)+1) {
+ fprintf(stderr, "end: regerror() short test gave `%s' not `%.*s'\n",
+ erbuf, SHORT-1, badpat);
+ status = 1;
+ }
+ ne = regerror(REG_ITOA|REG_BADPAT, (regex_t *)NULL, erbuf, sizeof(erbuf));
+ if (strcmp(erbuf, bpname) != 0 || ne != strlen(bpname)+1) {
+ fprintf(stderr, "end: regerror() ITOA test gave `%s' not `%s'\n",
+ erbuf, bpname);
+ status = 1;
+ }
+ re.re_endp = bpname;
+ ne = regerror(REG_ATOI, &re, erbuf, sizeof(erbuf));
+ if (atoi(erbuf) != (int)REG_BADPAT) {
+ fprintf(stderr, "end: regerror() ATOI test gave `%s' not `%ld'\n",
+ erbuf, (long)REG_BADPAT);
+ status = 1;
+ } else if (ne != strlen(erbuf)+1) {
+ fprintf(stderr, "end: regerror() ATOI test len(`%s') = %ld\n",
+ erbuf, (long)REG_BADPAT);
+ status = 1;
+ }
+}
+
+/*
+ - try - try it, and report on problems
+ == void try(char *f0, char *f1, char *f2, char *f3, char *f4, int opts);
+ */
+void
+try(f0, f1, f2, f3, f4, opts)
+char *f0;
+char *f1;
+char *f2;
+char *f3;
+char *f4;
+int opts; /* may not match f1 */
+{
+ regex_t re;
+# define NSUBS 10
+ regmatch_t subs[NSUBS];
+# define NSHOULD 15
+ char *should[NSHOULD];
+ int nshould;
+ char erbuf[100];
+ int err;
+ int len;
+ char *type = (opts & REG_EXTENDED) ? "ERE" : "BRE";
+ register int i;
+ char *grump;
+ char f0copy[1000];
+ char f2copy[1000];
+
+ strcpy(f0copy, f0);
+ re.re_endp = (opts®_PEND) ? f0copy + strlen(f0copy) : NULL;
+ fixstr(f0copy);
+ err = regcomp(&re, f0copy, opts);
+ if (err != 0 && (!opt('C', f1) || err != efind(f2))) {
+ /* unexpected error or wrong error */
+ len = regerror(err, &re, erbuf, sizeof(erbuf));
+ fprintf(stderr, "%d: %s error %s, %d/%d `%s'\n",
+ line, type, eprint(err), len,
+ sizeof(erbuf), erbuf);
+ status = 1;
+ } else if (err == 0 && opt('C', f1)) {
+ /* unexpected success */
+ fprintf(stderr, "%d: %s should have given REG_%s\n",
+ line, type, f2);
+ status = 1;
+ err = 1; /* so we won't try regexec */
+ }
+
+ if (err != 0) {
+ regfree(&re);
+ return;
+ }
+
+ strcpy(f2copy, f2);
+ fixstr(f2copy);
+
+ if (options('e', f1)®_STARTEND) {
+ if (strchr(f2, '(') == NULL || strchr(f2, ')') == NULL)
+ fprintf(stderr, "%d: bad STARTEND syntax\n", line);
+ subs[0].rm_so = strchr(f2, '(') - f2 + 1;
+ subs[0].rm_eo = strchr(f2, ')') - f2;
+ }
+ err = regexec(&re, f2copy, NSUBS, subs, options('e', f1));
+
+ if (err != 0 && (f3 != NULL || err != REG_NOMATCH)) {
+ /* unexpected error or wrong error */
+ len = regerror(err, &re, erbuf, sizeof(erbuf));
+ fprintf(stderr, "%d: %s exec error %s, %d/%d `%s'\n",
+ line, type, eprint(err), len,
+ sizeof(erbuf), erbuf);
+ status = 1;
+ } else if (err != 0) {
+ /* nothing more to check */
+ } else if (f3 == NULL) {
+ /* unexpected success */
+ fprintf(stderr, "%d: %s exec should have failed\n",
+ line, type);
+ status = 1;
+ err = 1; /* just on principle */
+ } else if (opts®_NOSUB) {
+ /* nothing more to check */
+ } else if ((grump = check(f2, subs[0], f3)) != NULL) {
+ fprintf(stderr, "%d: %s %s\n", line, type, grump);
+ status = 1;
+ err = 1;
+ }
+
+ if (err != 0 || f4 == NULL) {
+ regfree(&re);
+ return;
+ }
+
+ for (i = 1; i < NSHOULD; i++)
+ should[i] = NULL;
+ nshould = split(f4, should+1, NSHOULD-1, ",");
+ if (nshould == 0) {
+ nshould = 1;
+ should[1] = "";
+ }
+ for (i = 1; i < NSUBS; i++) {
+ grump = check(f2, subs[i], should[i]);
+ if (grump != NULL) {
+ fprintf(stderr, "%d: %s $%d %s\n", line,
+ type, i, grump);
+ status = 1;
+ err = 1;
+ }
+ }
+
+ regfree(&re);
+}
+
+/*
+ - options - pick options out of a regression-test string
+ == int options(int type, char *s);
+ */
+int
+options(type, s)
+int type; /* 'c' compile, 'e' exec */
+char *s;
+{
+ register char *p;
+ register int o = (type == 'c') ? copts : eopts;
+ register char *legal = (type == 'c') ? "bisnmp" : "^$#tl";
+
+ for (p = s; *p != '\0'; p++)
+ if (strchr(legal, *p) != NULL)
+ switch (*p) {
+ case 'b':
+ o &= ~REG_EXTENDED;
+ break;
+ case 'i':
+ o |= REG_ICASE;
+ break;
+ case 's':
+ o |= REG_NOSUB;
+ break;
+ case 'n':
+ o |= REG_NEWLINE;
+ break;
+ case 'm':
+ o &= ~REG_EXTENDED;
+ o |= REG_NOSPEC;
+ break;
+ case 'p':
+ o |= REG_PEND;
+ break;
+ case '^':
+ o |= REG_NOTBOL;
+ break;
+ case '$':
+ o |= REG_NOTEOL;
+ break;
+ case '#':
+ o |= REG_STARTEND;
+ break;
+ case 't': /* trace */
+ o |= REG_TRACE;
+ break;
+ case 'l': /* force long representation */
+ o |= REG_LARGE;
+ break;
+ case 'r': /* force backref use */
+ o |= REG_BACKR;
+ break;
+ }
+ return(o);
+}
+
+/*
+ - opt - is a particular option in a regression string?
+ == int opt(int c, char *s);
+ */
+int /* predicate */
+opt(c, s)
+int c;
+char *s;
+{
+ return(strchr(s, c) != NULL);
+}
+
+/*
+ - fixstr - transform magic characters in strings
+ == void fixstr(register char *p);
+ */
+void
+fixstr(p)
+register char *p;
+{
+ if (p == NULL)
+ return;
+
+ for (; *p != '\0'; p++)
+ if (*p == 'N')
+ *p = '\n';
+ else if (*p == 'T')
+ *p = '\t';
+ else if (*p == 'S')
+ *p = ' ';
+ else if (*p == 'Z')
+ *p = '\0';
+}
+
+/*
+ - check - check a substring match
+ == char *check(char *str, regmatch_t sub, char *should);
+ */
+char * /* NULL or complaint */
+check(str, sub, should)
+char *str;
+regmatch_t sub;
+char *should;
+{
+ register int len;
+ register int shlen;
+ register char *p;
+ static char grump[500];
+ register char *at = NULL;
+
+ if (should != NULL && strcmp(should, "-") == 0)
+ should = NULL;
+ if (should != NULL && should[0] == '@') {
+ at = should + 1;
+ should = "";
+ }
+
+ /* check rm_so and rm_eo for consistency */
+ if (sub.rm_so > sub.rm_eo || (sub.rm_so == -1 && sub.rm_eo != -1) ||
+ (sub.rm_so != -1 && sub.rm_eo == -1) ||
+ (sub.rm_so != -1 && sub.rm_so < 0) ||
+ (sub.rm_eo != -1 && sub.rm_eo < 0) ) {
+ sprintf(grump, "start %ld end %ld", (long)sub.rm_so,
+ (long)sub.rm_eo);
+ return(grump);
+ }
+
+ /* check for no match */
+ if (sub.rm_so == -1 && should == NULL)
+ return(NULL);
+ if (sub.rm_so == -1)
+ return("did not match");
+
+ /* check for in range */
+ if (sub.rm_eo > strlen(str)) {
+ sprintf(grump, "start %ld end %ld, past end of string",
+ (long)sub.rm_so, (long)sub.rm_eo);
+ return(grump);
+ }
+
+ len = (int)(sub.rm_eo - sub.rm_so);
+ shlen = (int)strlen(should);
+ p = str + sub.rm_so;
+
+ /* check for not supposed to match */
+ if (should == NULL) {
+ sprintf(grump, "matched `%.*s'", len, p);
+ return(grump);
+ }
+
+ /* check for wrong match */
+ if (len != shlen || strncmp(p, should, (size_t)shlen) != 0) {
+ sprintf(grump, "matched `%.*s' instead", len, p);
+ return(grump);
+ }
+ if (shlen > 0)
+ return(NULL);
+
+ /* check null match in right place */
+ if (at == NULL)
+ return(NULL);
+ shlen = strlen(at);
+ if (shlen == 0)
+ shlen = 1; /* force check for end-of-string */
+ if (strncmp(p, at, shlen) != 0) {
+ sprintf(grump, "matched null at `%.20s'", p);
+ return(grump);
+ }
+ return(NULL);
+}
+
+/*
+ - eprint - convert error number to name
+ == static char *eprint(int err);
+ */
+static char *
+eprint(err)
+int err;
+{
+ static char epbuf[100];
+ size_t len;
+
+ len = regerror(REG_ITOA|err, (regex_t *)NULL, epbuf, sizeof(epbuf));
+ assert(len <= sizeof(epbuf));
+ return(epbuf);
+}
+
+/*
+ - efind - convert error name to number
+ == static int efind(char *name);
+ */
+static int
+efind(name)
+char *name;
+{
+ static char efbuf[100];
+ size_t n;
+ regex_t re;
+
+ sprintf(efbuf, "REG_%s", name);
+ assert(strlen(efbuf) < sizeof(efbuf));
+ re.re_endp = efbuf;
+ (void) regerror(REG_ATOI, &re, efbuf, sizeof(efbuf));
+ return(atoi(efbuf));
+}
diff --git a/src/regex/re_syntax.n b/src/regex/re_syntax.n
deleted file mode 100644
index f37bb85abd..0000000000
--- a/src/regex/re_syntax.n
+++ /dev/null
@@ -1,970 +0,0 @@
-'\"
-'\" Copyright (c) 1998 Sun Microsystems, Inc.
-'\" Copyright (c) 1999 Scriptics Corporation
-'\"
-'\" This software is copyrighted by the Regents of the University of
-'\" California, Sun Microsystems, Inc., Scriptics Corporation, ActiveState
-'\" Corporation and other parties. The following terms apply to all files
-'\" associated with the software unless explicitly disclaimed in
-'\" individual files.
-'\"
-'\" The authors hereby grant permission to use, copy, modify, distribute,
-'\" and license this software and its documentation for any purpose, provided
-'\" that existing copyright notices are retained in all copies and that this
-'\" notice is included verbatim in any distributions. No written agreement,
-'\" license, or royalty fee is required for any of the authorized uses.
-'\" Modifications to this software may be copyrighted by their authors
-'\" and need not follow the licensing terms described here, provided that
-'\" the new terms are clearly indicated on the first page of each file where
-'\" they apply.
-'\"
-'\" IN NO EVENT SHALL THE AUTHORS OR DISTRIBUTORS BE LIABLE TO ANY PARTY
-'\" FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
-'\" ARISING OUT OF THE USE OF THIS SOFTWARE, ITS DOCUMENTATION, OR ANY
-'\" DERIVATIVES THEREOF, EVEN IF THE AUTHORS HAVE BEEN ADVISED OF THE
-'\" POSSIBILITY OF SUCH DAMAGE.
-'\"
-'\" THE AUTHORS AND DISTRIBUTORS SPECIFICALLY DISCLAIM ANY WARRANTIES,
-'\" INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY,
-'\" FITNESS FOR A PARTICULAR PURPOSE, AND NON-INFRINGEMENT. THIS SOFTWARE
-'\" IS PROVIDED ON AN "AS IS" BASIS, AND THE AUTHORS AND DISTRIBUTORS HAVE
-'\" NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR
-'\" MODIFICATIONS.
-'\"
-'\" GOVERNMENT USE: If you are acquiring this software on behalf of the
-'\" U.S. government, the Government shall have only "Restricted Rights"
-'\" in the software and related documentation as defined in the Federal
-'\" Acquisition Regulations (FARs) in Clause 52.227.19 (c) (2). If you
-'\" are acquiring the software on behalf of the Department of Defense, the
-'\" software shall be classified as "Commercial Computer Software" and the
-'\" Government shall have only "Restricted Rights" as defined in Clause
-'\" 252.227-7013 (c) (1) of DFARs. Notwithstanding the foregoing, the
-'\" authors grant the U.S. Government and others acting in its behalf
-'\" permission to use and distribute the software in accordance with the
-'\" terms specified in this license.
-'\"
-'\" RCS: @(#) Id: re_syntax.n,v 1.3 1999/07/14 19:09:36 jpeek Exp
-'\"
-.so man.macros
-.TH re_syntax n "8.1" Tcl "Tcl Built-In Commands"
-.BS
-.SH NAME
-re_syntax \- Syntax of Tcl regular expressions.
-.BE
-
-.SH DESCRIPTION
-.PP
-A \fIregular expression\fR describes strings of characters.
-It's a pattern that matches certain strings and doesn't match others.
-
-.SH "DIFFERENT FLAVORS OF REs"
-Regular expressions (``RE''s), as defined by POSIX, come in two
-flavors: \fIextended\fR REs (``EREs'') and \fIbasic\fR REs (``BREs'').
-EREs are roughly those of the traditional \fIegrep\fR, while BREs are
-roughly those of the traditional \fIed\fR. This implementation adds
-a third flavor, \fIadvanced\fR REs (``AREs''), basically EREs with
-some significant extensions.
-.PP
-This manual page primarily describes AREs. BREs mostly exist for
-backward compatibility in some old programs; they will be discussed at
-the end. POSIX EREs are almost an exact subset of AREs. Features of
-AREs that are not present in EREs will be indicated.
-
-.SH "REGULAR EXPRESSION SYNTAX"
-.PP
-Tcl regular expressions are implemented using the package written by
-Henry Spencer, based on the 1003.2 spec and some (not quite all) of
-the Perl5 extensions (thanks, Henry!). Much of the description of
-regular expressions below is copied verbatim from his manual entry.
-.PP
-An ARE is one or more \fIbranches\fR,
-separated by `\fB|\fR',
-matching anything that matches any of the branches.
-.PP
-A branch is zero or more \fIconstraints\fR or \fIquantified atoms\fR,
-concatenated.
-It matches a match for the first, followed by a match for the second, etc;
-an empty branch matches the empty string.
-.PP
-A quantified atom is an \fIatom\fR possibly followed
-by a single \fIquantifier\fR.
-Without a quantifier, it matches a match for the atom.
-The quantifiers,
-and what a so-quantified atom matches, are:
-.RS 2
-.TP 6
-\fB*\fR
-a sequence of 0 or more matches of the atom
-.TP
-\fB+\fR
-a sequence of 1 or more matches of the atom
-.TP
-\fB?\fR
-a sequence of 0 or 1 matches of the atom
-.TP
-\fB{\fIm\fB}\fR
-a sequence of exactly \fIm\fR matches of the atom
-.TP
-\fB{\fIm\fB,}\fR
-a sequence of \fIm\fR or more matches of the atom
-.TP
-\fB{\fIm\fB,\fIn\fB}\fR
-a sequence of \fIm\fR through \fIn\fR (inclusive) matches of the atom;
-\fIm\fR may not exceed \fIn\fR
-.TP
-\fB*? +? ?? {\fIm\fB}? {\fIm\fB,}? {\fIm\fB,\fIn\fB}?\fR
-\fInon-greedy\fR quantifiers,
-which match the same possibilities,
-but prefer the smallest number rather than the largest number
-of matches (see MATCHING)
-.RE
-.PP
-The forms using
-\fB{\fR and \fB}\fR
-are known as \fIbound\fRs.
-The numbers
-\fIm\fR and \fIn\fR are unsigned decimal integers
-with permissible values from 0 to 255 inclusive.
-.PP
-An atom is one of:
-.RS 2
-.TP 6
-\fB(\fIre\fB)\fR
-(where \fIre\fR is any regular expression)
-matches a match for
-\fIre\fR, with the match noted for possible reporting
-.TP
-\fB(?:\fIre\fB)\fR
-as previous,
-but does no reporting
-(a ``non-capturing'' set of parentheses)
-.TP
-\fB()\fR
-matches an empty string,
-noted for possible reporting
-.TP
-\fB(?:)\fR
-matches an empty string,
-without reporting
-.TP
-\fB[\fIchars\fB]\fR
-a \fIbracket expression\fR,
-matching any one of the \fIchars\fR (see BRACKET EXPRESSIONS for more detail)
-.TP
- \fB.\fR
-matches any single character
-.TP
-\fB\e\fIk\fR
-(where \fIk\fR is a non-alphanumeric character)
-matches that character taken as an ordinary character,
-e.g. \e\e matches a backslash character
-.TP
-\fB\e\fIc\fR
-where \fIc\fR is alphanumeric
-(possibly followed by other characters),
-an \fIescape\fR (AREs only),
-see ESCAPES below
-.TP
-\fB{\fR
-when followed by a character other than a digit,
-matches the left-brace character `\fB{\fR';
-when followed by a digit, it is the beginning of a
-\fIbound\fR (see above)
-.TP
-\fIx\fR
-where \fIx\fR is
-a single character with no other significance, matches that character.
-.RE
-.PP
-A \fIconstraint\fR matches an empty string when specific conditions
-are met.
-A constraint may not be followed by a quantifier.
-The simple constraints are as follows; some more constraints are
-described later, under ESCAPES.
-.RS 2
-.TP 8
-\fB^\fR
-matches at the beginning of a line
-.TP
-\fB$\fR
-matches at the end of a line
-.TP
-\fB(?=\fIre\fB)\fR
-\fIpositive lookahead\fR (AREs only), matches at any point
-where a substring matching \fIre\fR begins
-.TP
-\fB(?!\fIre\fB)\fR
-\fInegative lookahead\fR (AREs only), matches at any point
-where no substring matching \fIre\fR begins
-.RE
-.PP
-The lookahead constraints may not contain back references (see later),
-and all parentheses within them are considered non-capturing.
-.PP
-An RE may not end with `\fB\e\fR'.
-
-.SH "BRACKET EXPRESSIONS"
-A \fIbracket expression\fR is a list of characters enclosed in `\fB[\|]\fR'.
-It normally matches any single character from the list (but see below).
-If the list begins with `\fB^\fR',
-it matches any single character
-(but see below) \fInot\fR from the rest of the list.
-.PP
-If two characters in the list are separated by `\fB\-\fR',
-this is shorthand
-for the full \fIrange\fR of characters between those two (inclusive) in the
-collating sequence,
-e.g.
-\fB[0\-9]\fR
-in ASCII matches any decimal digit.
-Two ranges may not share an
-endpoint, so e.g.
-\fBa\-c\-e\fR
-is illegal.
-Ranges are very collating-sequence-dependent,
-and portable programs should avoid relying on them.
-.PP
-To include a literal
-\fB]\fR
-or
-\fB\-\fR
-in the list,
-the simplest method is to
-enclose it in
-\fB[.\fR and \fB.]\fR
-to make it a collating element (see below).
-Alternatively,
-make it the first character
-(following a possible `\fB^\fR'),
-or (AREs only) precede it with `\fB\e\fR'.
-Alternatively, for `\fB\-\fR',
-make it the last character,
-or the second endpoint of a range.
-To use a literal
-\fB\-\fR
-as the first endpoint of a range,
-make it a collating element
-or (AREs only) precede it with `\fB\e\fR'.
-With the exception of these, some combinations using
-\fB[\fR
-(see next
-paragraphs), and escapes,
-all other special characters lose their
-special significance within a bracket expression.
-.PP
-Within a bracket expression, a collating element (a character,
-a multi-character sequence that collates as if it were a single character,
-or a collating-sequence name for either)
-enclosed in
-\fB[.\fR and \fB.]\fR
-stands for the
-sequence of characters of that collating element.
-The sequence is a single element of the bracket expression's list.
-A bracket expression in a locale that has
-multi-character collating elements
-can thus match more than one character.
-.VS 8.2
-So (insidiously), a bracket expression that starts with \fB^\fR
-can match multi-character collating elements even if none of them
-appear in the bracket expression!
-(\fINote:\fR Tcl currently has no multi-character collating elements.
-This information is only for illustration.)
-.PP
-For example, assume the collating sequence includes a \fBch\fR
-multi-character collating element.
-Then the RE \fB[[.ch.]]*c\fR (zero or more \fBch\fP's followed by \fBc\fP)
-matches the first five characters of `\fBchchcc\fR'.
-Also, the RE \fB[^c]b\fR matches all of `\fBchb\fR'
-(because \fB[^c]\fR matches the multi-character \fBch\fR).
-.VE 8.2
-.PP
-Within a bracket expression, a collating element enclosed in
-\fB[=\fR
-and
-\fB=]\fR
-is an equivalence class, standing for the sequences of characters
-of all collating elements equivalent to that one, including itself.
-(If there are no other equivalent collating elements,
-the treatment is as if the enclosing delimiters were `\fB[.\fR'\&
-and `\fB.]\fR'.)
-For example, if
-\fBo\fR
-and
-\fB\o'o^'\fR
-are the members of an equivalence class,
-then `\fB[[=o=]]\fR', `\fB[[=\o'o^'=]]\fR',
-and `\fB[o\o'o^']\fR'\&
-are all synonymous.
-An equivalence class may not be an endpoint
-of a range.
-.VS 8.2
-(\fINote:\fR
-Tcl currently implements only the Unicode locale.
-It doesn't define any equivalence classes.
-The examples above are just illustrations.)
-.VE 8.2
-.PP
-Within a bracket expression, the name of a \fIcharacter class\fR enclosed
-in
-\fB[:\fR
-and
-\fB:]\fR
-stands for the list of all characters
-(not all collating elements!)
-belonging to that
-class.
-Standard character classes are:
-.PP
-.RS
-.ne 5
-.nf
-.ta 3c
-\fBalpha\fR A letter.
-\fBupper\fR An upper-case letter.
-\fBlower\fR A lower-case letter.
-\fBdigit\fR A decimal digit.
-\fBxdigit\fR A hexadecimal digit.
-\fBalnum\fR An alphanumeric (letter or digit).
-\fBprint\fR An alphanumeric (same as alnum).
-\fBblank\fR A space or tab character.
-\fBspace\fR A character producing white space in displayed text.
-\fBpunct\fR A punctuation character.
-\fBgraph\fR A character with a visible representation.
-\fBcntrl\fR A control character.
-.fi
-.RE
-.PP
-A locale may provide others.
-.VS 8.2
-(Note that the current Tcl implementation has only one locale:
-the Unicode locale.)
-.VE 8.2
-A character class may not be used as an endpoint of a range.
-.PP
-There are two special cases of bracket expressions:
-the bracket expressions
-\fB[[:<:]]\fR
-and
-\fB[[:>:]]\fR
-are constraints, matching empty strings at
-the beginning and end of a word respectively.
-'\" note, discussion of escapes below references this definition of word
-A word is defined as a sequence of
-word characters
-that is neither preceded nor followed by
-word characters.
-A word character is an
-\fIalnum\fR
-character
-or an underscore
-(\fB_\fR).
-These special bracket expressions are deprecated;
-users of AREs should use constraint escapes instead (see below).
-.SH ESCAPES
-Escapes (AREs only), which begin with a
-\fB\e\fR
-followed by an alphanumeric character,
-come in several varieties:
-character entry, class shorthands, constraint escapes, and back references.
-A
-\fB\e\fR
-followed by an alphanumeric character but not constituting
-a valid escape is illegal in AREs.
-In EREs, there are no escapes:
-outside a bracket expression,
-a
-\fB\e\fR
-followed by an alphanumeric character merely stands for that
-character as an ordinary character,
-and inside a bracket expression,
-\fB\e\fR
-is an ordinary character.
-(The latter is the one actual incompatibility between EREs and AREs.)
-.PP
-Character-entry escapes (AREs only) exist to make it easier to specify
-non-printing and otherwise inconvenient characters in REs:
-.RS 2
-.TP 5
-\fB\ea\fR
-alert (bell) character, as in C
-.TP
-\fB\eb\fR
-backspace, as in C
-.TP
-\fB\eB\fR
-synonym for
-\fB\e\fR
-to help reduce backslash doubling in some
-applications where there are multiple levels of backslash processing
-.TP
-\fB\ec\fIX\fR
-(where X is any character) the character whose
-low-order 5 bits are the same as those of
-\fIX\fR,
-and whose other bits are all zero
-.TP
-\fB\ee\fR
-the character whose collating-sequence name
-is `\fBESC\fR',
-or failing that, the character with octal value 033
-.TP
-\fB\ef\fR
-formfeed, as in C
-.TP
-\fB\en\fR
-newline, as in C
-.TP
-\fB\er\fR
-carriage return, as in C
-.TP
-\fB\et\fR
-horizontal tab, as in C
-.TP
-\fB\eu\fIwxyz\fR
-(where
-\fIwxyz\fR
-is exactly four hexadecimal digits)
-the Unicode character
-\fBU+\fIwxyz\fR
-in the local byte ordering
-.TP
-\fB\eU\fIstuvwxyz\fR
-(where
-\fIstuvwxyz\fR
-is exactly eight hexadecimal digits)
-reserved for a somewhat-hypothetical Unicode extension to 32 bits
-.TP
-\fB\ev\fR
-vertical tab, as in C
-are all available.
-.TP
-\fB\ex\fIhhh\fR
-(where
-\fIhhh\fR
-is any sequence of hexadecimal digits)
-the character whose hexadecimal value is
-\fB0x\fIhhh\fR
-(a single character no matter how many hexadecimal digits are used).
-.TP
-\fB\e0\fR
-the character whose value is
-\fB0\fR
-.TP
-\fB\e\fIxy\fR
-(where
-\fIxy\fR
-is exactly two octal digits,
-and is not a
-\fIback reference\fR (see below))
-the character whose octal value is
-\fB0\fIxy\fR
-.TP
-\fB\e\fIxyz\fR
-(where
-\fIxyz\fR
-is exactly three octal digits,
-and is not a
-back reference (see below))
-the character whose octal value is
-\fB0\fIxyz\fR
-.RE
-.PP
-Hexadecimal digits are `\fB0\fR'-`\fB9\fR', `\fBa\fR'-`\fBf\fR',
-and `\fBA\fR'-`\fBF\fR'.
-Octal digits are `\fB0\fR'-`\fB7\fR'.
-.PP
-The character-entry escapes are always taken as ordinary characters.
-For example,
-\fB\e135\fR
-is
-\fB]\fR
-in ASCII,
-but
-\fB\e135\fR
-does not terminate a bracket expression.
-Beware, however, that some applications (e.g., C compilers) interpret
-such sequences themselves before the regular-expression package
-gets to see them, which may require doubling (quadrupling, etc.) the `\fB\e\fR'.
-.PP
-Class-shorthand escapes (AREs only) provide shorthands for certain commonly-used
-character classes:
-.RS 2
-.TP 10
-\fB\ed\fR
-\fB[[:digit:]]\fR
-.TP
-\fB\es\fR
-\fB[[:space:]]\fR
-.TP
-\fB\ew\fR
-\fB[[:alnum:]_]\fR
-(note underscore)
-.TP
-\fB\eD\fR
-\fB[^[:digit:]]\fR
-.TP
-\fB\eS\fR
-\fB[^[:space:]]\fR
-.TP
-\fB\eW\fR
-\fB[^[:alnum:]_]\fR
-(note underscore)
-.RE
-.PP
-Within bracket expressions, `\fB\ed\fR', `\fB\es\fR',
-and `\fB\ew\fR'\&
-lose their outer brackets,
-and `\fB\eD\fR', `\fB\eS\fR',
-and `\fB\eW\fR'\&
-are illegal.
-.VS 8.2
-(So, for example, \fB[a-c\ed]\fR is equivalent to \fB[a-c[:digit:]]\fR.
-Also, \fB[a-c\eD]\fR, which is equivalent to \fB[a-c^[:digit:]]\fR, is illegal.)
-.VE 8.2
-.PP
-A constraint escape (AREs only) is a constraint,
-matching the empty string if specific conditions are met,
-written as an escape:
-.RS 2
-.TP 6
-\fB\eA\fR
-matches only at the beginning of the string
-(see MATCHING, below, for how this differs from `\fB^\fR')
-.TP
-\fB\em\fR
-matches only at the beginning of a word
-.TP
-\fB\eM\fR
-matches only at the end of a word
-.TP
-\fB\ey\fR
-matches only at the beginning or end of a word
-.TP
-\fB\eY\fR
-matches only at a point that is not the beginning or end of a word
-.TP
-\fB\eZ\fR
-matches only at the end of the string
-(see MATCHING, below, for how this differs from `\fB$\fR')
-.TP
-\fB\e\fIm\fR
-(where
-\fIm\fR
-is a nonzero digit) a \fIback reference\fR, see below
-.TP
-\fB\e\fImnn\fR
-(where
-\fIm\fR
-is a nonzero digit, and
-\fInn\fR
-is some more digits,
-and the decimal value
-\fImnn\fR
-is not greater than the number of closing capturing parentheses seen so far)
-a \fIback reference\fR, see below
-.RE
-.PP
-A word is defined as in the specification of
-\fB[[:<:]]\fR
-and
-\fB[[:>:]]\fR
-above.
-Constraint escapes are illegal within bracket expressions.
-.PP
-A back reference (AREs only) matches the same string matched by the parenthesized
-subexpression specified by the number,
-so that (e.g.)
-\fB([bc])\e1\fR
-matches
-\fBbb\fR
-or
-\fBcc\fR
-but not `\fBbc\fR'.
-The subexpression must entirely precede the back reference in the RE.
-Subexpressions are numbered in the order of their leading parentheses.
-Non-capturing parentheses do not define subexpressions.
-.PP
-There is an inherent historical ambiguity between octal character-entry
-escapes and back references, which is resolved by heuristics,
-as hinted at above.
-A leading zero always indicates an octal escape.
-A single non-zero digit, not followed by another digit,
-is always taken as a back reference.
-A multi-digit sequence not starting with a zero is taken as a back
-reference if it comes after a suitable subexpression
-(i.e. the number is in the legal range for a back reference),
-and otherwise is taken as octal.
-.SH "METASYNTAX"
-In addition to the main syntax described above, there are some special
-forms and miscellaneous syntactic facilities available.
-.PP
-Normally the flavor of RE being used is specified by
-application-dependent means.
-However, this can be overridden by a \fIdirector\fR.
-If an RE of any flavor begins with `\fB***:\fR',
-the rest of the RE is an ARE.
-If an RE of any flavor begins with `\fB***=\fR',
-the rest of the RE is taken to be a literal string,
-with all characters considered ordinary characters.
-.PP
-An ARE may begin with \fIembedded options\fR:
-a sequence
-\fB(?\fIxyz\fB)\fR
-(where
-\fIxyz\fR
-is one or more alphabetic characters)
-specifies options affecting the rest of the RE.
-These supplement, and can override,
-any options specified by the application.
-The available option letters are:
-.RS 2
-.TP 3
-\fBb\fR
-rest of RE is a BRE
-.TP 3
-\fBc\fR
-case-sensitive matching (usual default)
-.TP 3
-\fBe\fR
-rest of RE is an ERE
-.TP 3
-\fBi\fR
-case-insensitive matching (see MATCHING, below)
-.TP 3
-\fBm\fR
-historical synonym for
-\fBn\fR
-.TP 3
-\fBn\fR
-newline-sensitive matching (see MATCHING, below)
-.TP 3
-\fBp\fR
-partial newline-sensitive matching (see MATCHING, below)
-.TP 3
-\fBq\fR
-rest of RE is a literal (``quoted'') string, all ordinary characters
-.TP 3
-\fBs\fR
-non-newline-sensitive matching (usual default)
-.TP 3
-\fBt\fR
-tight syntax (usual default; see below)
-.TP 3
-\fBw\fR
-inverse partial newline-sensitive (``weird'') matching (see MATCHING, below)
-.TP 3
-\fBx\fR
-expanded syntax (see below)
-.RE
-.PP
-Embedded options take effect at the
-\fB)\fR
-terminating the sequence.
-They are available only at the start of an ARE,
-and may not be used later within it.
-.PP
-In addition to the usual (\fItight\fR) RE syntax, in which all characters are
-significant, there is an \fIexpanded\fR syntax,
-available in all flavors of RE
-with the \fB-expanded\fR switch, or in AREs with the embedded x option.
-In the expanded syntax,
-white-space characters are ignored
-and all characters between a
-\fB#\fR
-and the following newline (or the end of the RE) are ignored,
-permitting paragraphing and commenting a complex RE.
-There are three exceptions to that basic rule:
-.RS 2
-.PP
-a white-space character or `\fB#\fR' preceded by `\fB\e\fR' is retained
-.PP
-white space or `\fB#\fR' within a bracket expression is retained
-.PP
-white space and comments are illegal within multi-character symbols
-like the ARE `\fB(?:\fR' or the BRE `\fB\e(\fR'
-.RE
-.PP
-Expanded-syntax white-space characters are blank, tab, newline, and
-.VS 8.2
-any character that belongs to the \fIspace\fR character class.
-.VE 8.2
-.PP
-Finally, in an ARE,
-outside bracket expressions, the sequence `\fB(?#\fIttt\fB)\fR'
-(where
-\fIttt\fR
-is any text not containing a `\fB)\fR')
-is a comment,
-completely ignored.
-Again, this is not allowed between the characters of
-multi-character symbols like `\fB(?:\fR'.
-Such comments are more a historical artifact than a useful facility,
-and their use is deprecated;
-use the expanded syntax instead.
-.PP
-\fINone\fR of these metasyntax extensions is available if the application
-(or an initial
-\fB***=\fR
-director)
-has specified that the user's input be treated as a literal string
-rather than as an RE.
-.SH MATCHING
-In the event that an RE could match more than one substring of a given
-string,
-the RE matches the one starting earliest in the string.
-If the RE could match more than one substring starting at that point,
-its choice is determined by its \fIpreference\fR:
-either the longest substring, or the shortest.
-.PP
-Most atoms, and all constraints, have no preference.
-A parenthesized RE has the same preference (possibly none) as the RE.
-A quantified atom with quantifier
-\fB{\fIm\fB}\fR
-or
-\fB{\fIm\fB}?\fR
-has the same preference (possibly none) as the atom itself.
-A quantified atom with other normal quantifiers (including
-\fB{\fIm\fB,\fIn\fB}\fR
-with
-\fIm\fR
-equal to
-\fIn\fR)
-prefers longest match.
-A quantified atom with other non-greedy quantifiers (including
-\fB{\fIm\fB,\fIn\fB}?\fR
-with
-\fIm\fR
-equal to
-\fIn\fR)
-prefers shortest match.
-A branch has the same preference as the first quantified atom in it
-which has a preference.
-An RE consisting of two or more branches connected by the
-\fB|\fR
-operator prefers longest match.
-.PP
-Subject to the constraints imposed by the rules for matching the whole RE,
-subexpressions also match the longest or shortest possible substrings,
-based on their preferences,
-with subexpressions starting earlier in the RE taking priority over
-ones starting later.
-Note that outer subexpressions thus take priority over
-their component subexpressions.
-.PP
-Note that the quantifiers
-\fB{1,1}\fR
-and
-\fB{1,1}?\fR
-can be used to force longest and shortest preference, respectively,
-on a subexpression or a whole RE.
-.PP
-Match lengths are measured in characters, not collating elements.
-An empty string is considered longer than no match at all.
-For example,
-\fBbb*\fR
-matches the three middle characters of `\fBabbbc\fR',
-\fB(week|wee)(night|knights)\fR
-matches all ten characters of `\fBweeknights\fR',
-when
-\fB(.*).*\fR
-is matched against
-\fBabc\fR
-the parenthesized subexpression
-matches all three characters, and
-when
-\fB(a*)*\fR
-is matched against
-\fBbc\fR
-both the whole RE and the parenthesized
-subexpression match an empty string.
-.PP
-If case-independent matching is specified,
-the effect is much as if all case distinctions had vanished from the
-alphabet.
-When an alphabetic that exists in multiple cases appears as an
-ordinary character outside a bracket expression, it is effectively
-transformed into a bracket expression containing both cases,
-so that
-\fBx\fR
-becomes `\fB[xX]\fR'.
-When it appears inside a bracket expression, all case counterparts
-of it are added to the bracket expression, so that
-\fB[x]\fR
-becomes
-\fB[xX]\fR
-and
-\fB[^x]\fR
-becomes `\fB[^xX]\fR'.
-.PP
-If newline-sensitive matching is specified, \fB.\fR
-and bracket expressions using
-\fB^\fR
-will never match the newline character
-(so that matches will never cross newlines unless the RE
-explicitly arranges it)
-and
-\fB^\fR
-and
-\fB$\fR
-will match the empty string after and before a newline
-respectively, in addition to matching at beginning and end of string
-respectively.
-ARE
-\fB\eA\fR
-and
-\fB\eZ\fR
-continue to match beginning or end of string \fIonly\fR.
-.PP
-If partial newline-sensitive matching is specified,
-this affects \fB.\fR
-and bracket expressions
-as with newline-sensitive matching, but not
-\fB^\fR
-and `\fB$\fR'.
-.PP
-If inverse partial newline-sensitive matching is specified,
-this affects
-\fB^\fR
-and
-\fB$\fR
-as with
-newline-sensitive matching,
-but not \fB.\fR
-and bracket expressions.
-This isn't very useful but is provided for symmetry.
-.SH "LIMITS AND COMPATIBILITY"
-No particular limit is imposed on the length of REs.
-Programs intended to be highly portable should not employ REs longer
-than 256 bytes,
-as a POSIX-compliant implementation can refuse to accept such REs.
-.PP
-The only feature of AREs that is actually incompatible with
-POSIX EREs is that
-\fB\e\fR
-does not lose its special
-significance inside bracket expressions.
-All other ARE features use syntax which is illegal or has
-undefined or unspecified effects in POSIX EREs;
-the
-\fB***\fR
-syntax of directors likewise is outside the POSIX
-syntax for both BREs and EREs.
-.PP
-Many of the ARE extensions are borrowed from Perl, but some have
-been changed to clean them up, and a few Perl extensions are not present.
-Incompatibilities of note include `\fB\eb\fR', `\fB\eB\fR',
-the lack of special treatment for a trailing newline,
-the addition of complemented bracket expressions to the things
-affected by newline-sensitive matching,
-the restrictions on parentheses and back references in lookahead constraints,
-and the longest/shortest-match (rather than first-match) matching semantics.
-.PP
-The matching rules for REs containing both normal and non-greedy quantifiers
-have changed since early beta-test versions of this package.
-(The new rules are much simpler and cleaner,
-but don't work as hard at guessing the user's real intentions.)
-.PP
-Henry Spencer's original 1986 \fIregexp\fR package,
-still in widespread use (e.g., in pre-8.1 releases of Tcl),
-implemented an early version of today's EREs.
-There are four incompatibilities between \fIregexp\fR's near-EREs
-(`RREs' for short) and AREs.
-In roughly increasing order of significance:
-.PP
-.RS
-In AREs,
-\fB\e\fR
-followed by an alphanumeric character is either an
-escape or an error,
-while in RREs, it was just another way of writing the
-alphanumeric.
-This should not be a problem because there was no reason to write
-such a sequence in RREs.
-.PP
-\fB{\fR
-followed by a digit in an ARE is the beginning of a bound,
-while in RREs,
-\fB{\fR
-was always an ordinary character.
-Such sequences should be rare,
-and will often result in an error because following characters
-will not look like a valid bound.
-.PP
-In AREs,
-\fB\e\fR
-remains a special character within `\fB[\|]\fR',
-so a literal
-\fB\e\fR
-within
-\fB[\|]\fR
-must be written `\fB\e\e\fR'.
-\fB\e\e\fR
-also gives a literal
-\fB\e\fR
-within
-\fB[\|]\fR
-in RREs,
-but only truly paranoid programmers routinely doubled the backslash.
-.PP
-AREs report the longest/shortest match for the RE,
-rather than the first found in a specified search order.
-This may affect some RREs which were written in the expectation that
-the first match would be reported.
-(The careful crafting of RREs to optimize the search order for fast
-matching is obsolete (AREs examine all possible matches
-in parallel, and their performance is largely insensitive to their
-complexity) but cases where the search order was exploited to deliberately
-find a match which was \fInot\fR the longest/shortest will need rewriting.)
-.RE
-
-.SH "BASIC REGULAR EXPRESSIONS"
-BREs differ from EREs in several respects. `\fB|\fR', `\fB+\fR',
-and
-\fB?\fR
-are ordinary characters and there is no equivalent
-for their functionality.
-The delimiters for bounds are
-\fB\e{\fR
-and `\fB\e}\fR',
-with
-\fB{\fR
-and
-\fB}\fR
-by themselves ordinary characters.
-The parentheses for nested subexpressions are
-\fB\e(\fR
-and `\fB\e)\fR',
-with
-\fB(\fR
-and
-\fB)\fR
-by themselves ordinary characters.
-\fB^\fR
-is an ordinary character except at the beginning of the
-RE or the beginning of a parenthesized subexpression,
-\fB$\fR
-is an ordinary character except at the end of the
-RE or the end of a parenthesized subexpression,
-and
-\fB*\fR
-is an ordinary character if it appears at the beginning of the
-RE or the beginning of a parenthesized subexpression
-(after a possible leading `\fB^\fR').
-Finally,
-single-digit back references are available,
-and
-\fB\e<\fR
-and
-\fB\e>\fR
-are synonyms for
-\fB[[:<:]]\fR
-and
-\fB[[:>:]]\fR
-respectively;
-no other escapes are available.
-
-.SH "SEE ALSO"
-RegExp(3), regexp(n), regsub(n), lsearch(n), switch(n), text(n)
-
-.SH KEYWORDS
-match, regular expression, string
diff --git a/src/regex/regc_color.c b/src/regex/regc_color.c
deleted file mode 100644
index 5376af2ed5..0000000000
--- a/src/regex/regc_color.c
+++ /dev/null
@@ -1,780 +0,0 @@
-/*
- * colorings of characters
- * This file is #included by regcomp.c.
- *
- * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved.
- *
- * Development of this software was funded, in part, by Cray Research Inc.,
- * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
- * Corporation, none of whom are responsible for the results. The author
- * thanks all of them.
- *
- * Redistribution and use in source and binary forms -- with or without
- * modification -- are permitted for any purpose, provided that
- * redistributions in source form retain this entire copyright notice and
- * indicate the origin and nature of any modifications.
- *
- * I'd appreciate being given credit for this package in the documentation
- * of software which uses it, but that is not a requirement.
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
- * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
- * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
- * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
- * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
- * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
- * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
- * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * $Header$
- *
- *
- * Note that there are some incestuous relationships between this code and
- * NFA arc maintenance, which perhaps ought to be cleaned up sometime.
- */
-
-
-
-#define CISERR() VISERR(cm->v)
-#define CERR(e) VERR(cm->v, (e))
-
-
-
-/*
- * initcm - set up new colormap
- */
-static void
-initcm(struct vars * v,
- struct colormap * cm)
-{
- int i;
- int j;
- union tree *t;
- union tree *nextt;
- struct colordesc *cd;
-
- cm->magic = CMMAGIC;
- cm->v = v;
-
- cm->ncds = NINLINECDS;
- cm->cd = cm->cdspace;
- cm->max = 0;
- cm->free = 0;
-
- cd = cm->cd; /* cm->cd[WHITE] */
- cd->sub = NOSUB;
- cd->arcs = NULL;
- cd->flags = 0;
- cd->nchrs = CHR_MAX - CHR_MIN + 1;
-
- /* upper levels of tree */
- for (t = &cm->tree[0], j = NBYTS - 1; j > 0; t = nextt, j--)
- {
- nextt = t + 1;
- for (i = BYTTAB - 1; i >= 0; i--)
- t->tptr[i] = nextt;
- }
- /* bottom level is solid white */
- t = &cm->tree[NBYTS - 1];
- for (i = BYTTAB - 1; i >= 0; i--)
- t->tcolor[i] = WHITE;
- cd->block = t;
-}
-
-/*
- * freecm - free dynamically-allocated things in a colormap
- */
-static void
-freecm(struct colormap * cm)
-{
- size_t i;
- union tree *cb;
-
- cm->magic = 0;
- if (NBYTS > 1)
- cmtreefree(cm, cm->tree, 0);
- for (i = 1; i <= cm->max; i++) /* skip WHITE */
- if (!UNUSEDCOLOR(&cm->cd[i]))
- {
- cb = cm->cd[i].block;
- if (cb != NULL)
- FREE(cb);
- }
- if (cm->cd != cm->cdspace)
- FREE(cm->cd);
-}
-
-/*
- * cmtreefree - free a non-terminal part of a colormap tree
- */
-static void
-cmtreefree(struct colormap * cm,
- union tree * tree,
- int level) /* level number (top == 0) of this block */
-{
- int i;
- union tree *t;
- union tree *fillt = &cm->tree[level + 1];
- union tree *cb;
-
- assert(level < NBYTS - 1); /* this level has pointers */
- for (i = BYTTAB - 1; i >= 0; i--)
- {
- t = tree->tptr[i];
- assert(t != NULL);
- if (t != fillt)
- {
- if (level < NBYTS - 2)
- { /* more pointer blocks below */
- cmtreefree(cm, t, level + 1);
- FREE(t);
- }
- else
- { /* color block below */
- cb = cm->cd[t->tcolor[0]].block;
- if (t != cb) /* not a solid block */
- FREE(t);
- }
- }
- }
-}
-
-/*
- * setcolor - set the color of a character in a colormap
- */
-static color /* previous color */
-setcolor(struct colormap * cm,
- chr c,
- pcolor co)
-{
- uchr uc = c;
- int shift;
- int level;
- int b;
- int bottom;
- union tree *t;
- union tree *newt;
- union tree *fillt;
- union tree *lastt;
- union tree *cb;
- color prev;
-
- assert(cm->magic == CMMAGIC);
- if (CISERR() || co == COLORLESS)
- return COLORLESS;
-
- t = cm->tree;
- for (level = 0, shift = BYTBITS * (NBYTS - 1); shift > 0;
- level++, shift -= BYTBITS)
- {
- b = (uc >> shift) & BYTMASK;
- lastt = t;
- t = lastt->tptr[b];
- assert(t != NULL);
- fillt = &cm->tree[level + 1];
- bottom = (shift <= BYTBITS) ? 1 : 0;
- cb = (bottom) ? cm->cd[t->tcolor[0]].block : fillt;
- if (t == fillt || t == cb)
- { /* must allocate a new block */
- newt = (union tree *) MALLOC((bottom) ?
- sizeof(struct colors) : sizeof(struct ptrs));
- if (newt == NULL)
- {
- CERR(REG_ESPACE);
- return COLORLESS;
- }
- if (bottom)
- memcpy(VS(newt->tcolor), VS(t->tcolor),
- BYTTAB * sizeof(color));
- else
- memcpy(VS(newt->tptr), VS(t->tptr),
- BYTTAB * sizeof(union tree *));
- t = newt;
- lastt->tptr[b] = t;
- }
- }
-
- b = uc & BYTMASK;
- prev = t->tcolor[b];
- t->tcolor[b] = (color) co;
- return prev;
-}
-
-/*
- * maxcolor - report largest color number in use
- */
-static color
-maxcolor(struct colormap * cm)
-{
- if (CISERR())
- return COLORLESS;
-
- return (color) cm->max;
-}
-
-/*
- * newcolor - find a new color (must be subject of setcolor at once)
- * Beware: may relocate the colordescs.
- */
-static color /* COLORLESS for error */
-newcolor(struct colormap * cm)
-{
- struct colordesc *cd;
- struct colordesc *new;
- size_t n;
-
- if (CISERR())
- return COLORLESS;
-
- if (cm->free != 0)
- {
- assert(cm->free > 0);
- assert((size_t) cm->free < cm->ncds);
- cd = &cm->cd[cm->free];
- assert(UNUSEDCOLOR(cd));
- assert(cd->arcs == NULL);
- cm->free = cd->sub;
- }
- else if (cm->max < cm->ncds - 1)
- {
- cm->max++;
- cd = &cm->cd[cm->max];
- }
- else
- {
- /* oops, must allocate more */
- n = cm->ncds * 2;
- if (cm->cd == cm->cdspace)
- {
- new = (struct colordesc *) MALLOC(n *
- sizeof(struct colordesc));
- if (new != NULL)
- memcpy(VS(new), VS(cm->cdspace), cm->ncds *
- sizeof(struct colordesc));
- }
- else
- new = (struct colordesc *) REALLOC(cm->cd,
- n * sizeof(struct colordesc));
- if (new == NULL)
- {
- CERR(REG_ESPACE);
- return COLORLESS;
- }
- cm->cd = new;
- cm->ncds = n;
- assert(cm->max < cm->ncds - 1);
- cm->max++;
- cd = &cm->cd[cm->max];
- }
-
- cd->nchrs = 0;
- cd->sub = NOSUB;
- cd->arcs = NULL;
- cd->flags = 0;
- cd->block = NULL;
-
- return (color) (cd - cm->cd);
-}
-
-/*
- * freecolor - free a color (must have no arcs or subcolor)
- */
-static void
-freecolor(struct colormap * cm,
- pcolor co)
-{
- struct colordesc *cd = &cm->cd[co];
- color pco,
- nco; /* for freelist scan */
-
- assert(co >= 0);
- if (co == WHITE)
- return;
-
- assert(cd->arcs == NULL);
- assert(cd->sub == NOSUB);
- assert(cd->nchrs == 0);
- cd->flags = FREECOL;
- if (cd->block != NULL)
- {
- FREE(cd->block);
- cd->block = NULL; /* just paranoia */
- }
-
- if ((size_t) co == cm->max)
- {
- while (cm->max > WHITE && UNUSEDCOLOR(&cm->cd[cm->max]))
- cm->max--;
- assert(cm->free >= 0);
- while ((size_t) cm->free > cm->max)
- cm->free = cm->cd[cm->free].sub;
- if (cm->free > 0)
- {
- assert(cm->free < cm->max);
- pco = cm->free;
- nco = cm->cd[pco].sub;
- while (nco > 0)
- if ((size_t) nco > cm->max)
- {
- /* take this one out of freelist */
- nco = cm->cd[nco].sub;
- cm->cd[pco].sub = nco;
- }
- else
- {
- assert(nco < cm->max);
- pco = nco;
- nco = cm->cd[pco].sub;
- }
- }
- }
- else
- {
- cd->sub = cm->free;
- cm->free = (color) (cd - cm->cd);
- }
-}
-
-/*
- * pseudocolor - allocate a false color, to be managed by other means
- */
-static color
-pseudocolor(struct colormap * cm)
-{
- color co;
-
- co = newcolor(cm);
- if (CISERR())
- return COLORLESS;
- cm->cd[co].nchrs = 1;
- cm->cd[co].flags = PSEUDO;
- return co;
-}
-
-/*
- * subcolor - allocate a new subcolor (if necessary) to this chr
- */
-static color
-subcolor(struct colormap * cm, chr c)
-{
- color co; /* current color of c */
- color sco; /* new subcolor */
-
- co = GETCOLOR(cm, c);
- sco = newsub(cm, co);
- if (CISERR())
- return COLORLESS;
- assert(sco != COLORLESS);
-
- if (co == sco) /* already in an open subcolor */
- return co; /* rest is redundant */
- cm->cd[co].nchrs--;
- cm->cd[sco].nchrs++;
- setcolor(cm, c, sco);
- return sco;
-}
-
-/*
- * newsub - allocate a new subcolor (if necessary) for a color
- */
-static color
-newsub(struct colormap * cm,
- pcolor co)
-{
- color sco; /* new subcolor */
-
- sco = cm->cd[co].sub;
- if (sco == NOSUB)
- { /* color has no open subcolor */
- if (cm->cd[co].nchrs == 1) /* optimization */
- return co;
- sco = newcolor(cm); /* must create subcolor */
- if (sco == COLORLESS)
- {
- assert(CISERR());
- return COLORLESS;
- }
- cm->cd[co].sub = sco;
- cm->cd[sco].sub = sco; /* open subcolor points to self */
- }
- assert(sco != NOSUB);
-
- return sco;
-}
-
-/*
- * subrange - allocate new subcolors to this range of chrs, fill in arcs
- */
-static void
-subrange(struct vars * v,
- chr from,
- chr to,
- struct state * lp,
- struct state * rp)
-{
- uchr uf;
- int i;
-
- assert(from <= to);
-
- /* first, align "from" on a tree-block boundary */
- uf = (uchr) from;
- i = (int) (((uf + BYTTAB - 1) & (uchr) ~BYTMASK) - uf);
- for (; from <= to && i > 0; i--, from++)
- newarc(v->nfa, PLAIN, subcolor(v->cm, from), lp, rp);
- if (from > to) /* didn't reach a boundary */
- return;
-
- /* deal with whole blocks */
- for (; to - from >= BYTTAB; from += BYTTAB)
- subblock(v, from, lp, rp);
-
- /* clean up any remaining partial table */
- for (; from <= to; from++)
- newarc(v->nfa, PLAIN, subcolor(v->cm, from), lp, rp);
-}
-
-/*
- * subblock - allocate new subcolors for one tree block of chrs, fill in arcs
- */
-static void
-subblock(struct vars * v,
- chr start, /* first of BYTTAB chrs */
- struct state * lp,
- struct state * rp)
-{
- uchr uc = start;
- struct colormap *cm = v->cm;
- int shift;
- int level;
- int i;
- int b;
- union tree *t;
- union tree *cb;
- union tree *fillt;
- union tree *lastt;
- int previ;
- int ndone;
- color co;
- color sco;
-
- assert((uc % BYTTAB) == 0);
-
- /* find its color block, making new pointer blocks as needed */
- t = cm->tree;
- fillt = NULL;
- for (level = 0, shift = BYTBITS * (NBYTS - 1); shift > 0;
- level++, shift -= BYTBITS)
- {
- b = (uc >> shift) & BYTMASK;
- lastt = t;
- t = lastt->tptr[b];
- assert(t != NULL);
- fillt = &cm->tree[level + 1];
- if (t == fillt && shift > BYTBITS)
- { /* need new ptr block */
- t = (union tree *) MALLOC(sizeof(struct ptrs));
- if (t == NULL)
- {
- CERR(REG_ESPACE);
- return;
- }
- memcpy(VS(t->tptr), VS(fillt->tptr),
- BYTTAB * sizeof(union tree *));
- lastt->tptr[b] = t;
- }
- }
-
- /* special cases: fill block or solid block */
- co = t->tcolor[0];
- cb = cm->cd[co].block;
- if (t == fillt || t == cb)
- {
- /* either way, we want a subcolor solid block */
- sco = newsub(cm, co);
- t = cm->cd[sco].block;
- if (t == NULL)
- { /* must set it up */
- t = (union tree *) MALLOC(sizeof(struct colors));
- if (t == NULL)
- {
- CERR(REG_ESPACE);
- return;
- }
- for (i = 0; i < BYTTAB; i++)
- t->tcolor[i] = sco;
- cm->cd[sco].block = t;
- }
- /* find loop must have run at least once */
- lastt->tptr[b] = t;
- newarc(v->nfa, PLAIN, sco, lp, rp);
- cm->cd[co].nchrs -= BYTTAB;
- cm->cd[sco].nchrs += BYTTAB;
- return;
- }
-
- /* general case, a mixed block to be altered */
- i = 0;
- while (i < BYTTAB)
- {
- co = t->tcolor[i];
- sco = newsub(cm, co);
- newarc(v->nfa, PLAIN, sco, lp, rp);
- previ = i;
- do
- {
- t->tcolor[i++] = sco;
- } while (i < BYTTAB && t->tcolor[i] == co);
- ndone = i - previ;
- cm->cd[co].nchrs -= ndone;
- cm->cd[sco].nchrs += ndone;
- }
-}
-
-/*
- * okcolors - promote subcolors to full colors
- */
-static void
-okcolors(struct nfa * nfa,
- struct colormap * cm)
-{
- struct colordesc *cd;
- struct colordesc *end = CDEND(cm);
- struct colordesc *scd;
- struct arc *a;
- color co;
- color sco;
-
- for (cd = cm->cd, co = 0; cd < end; cd++, co++)
- {
- sco = cd->sub;
- if (UNUSEDCOLOR(cd) || sco == NOSUB)
- {
- /* has no subcolor, no further action */
- }
- else if (sco == co)
- {
- /* is subcolor, let parent deal with it */
- }
- else if (cd->nchrs == 0)
- {
- /* parent empty, its arcs change color to subcolor */
- cd->sub = NOSUB;
- scd = &cm->cd[sco];
- assert(scd->nchrs > 0);
- assert(scd->sub == sco);
- scd->sub = NOSUB;
- while ((a = cd->arcs) != NULL)
- {
- assert(a->co == co);
- /* uncolorchain(cm, a); */
- cd->arcs = a->colorchain;
- a->co = sco;
- /* colorchain(cm, a); */
- a->colorchain = scd->arcs;
- scd->arcs = a;
- }
- freecolor(cm, co);
- }
- else
- {
- /* parent's arcs must gain parallel subcolor arcs */
- cd->sub = NOSUB;
- scd = &cm->cd[sco];
- assert(scd->nchrs > 0);
- assert(scd->sub == sco);
- scd->sub = NOSUB;
- for (a = cd->arcs; a != NULL; a = a->colorchain)
- {
- assert(a->co == co);
- newarc(nfa, a->type, sco, a->from, a->to);
- }
- }
- }
-}
-
-/*
- * colorchain - add this arc to the color chain of its color
- */
-static void
-colorchain(struct colormap * cm,
- struct arc * a)
-{
- struct colordesc *cd = &cm->cd[a->co];
-
- a->colorchain = cd->arcs;
- cd->arcs = a;
-}
-
-/*
- * uncolorchain - delete this arc from the color chain of its color
- */
-static void
-uncolorchain(struct colormap * cm,
- struct arc * a)
-{
- struct colordesc *cd = &cm->cd[a->co];
- struct arc *aa;
-
- aa = cd->arcs;
- if (aa == a) /* easy case */
- cd->arcs = a->colorchain;
- else
- {
- for (; aa != NULL && aa->colorchain != a; aa = aa->colorchain)
- continue;
- assert(aa != NULL);
- aa->colorchain = a->colorchain;
- }
- a->colorchain = NULL; /* paranoia */
-}
-
-/*
- * singleton - is this character in its own color?
- */
-static int /* predicate */
-singleton(struct colormap * cm,
- chr c)
-{
- color co; /* color of c */
-
- co = GETCOLOR(cm, c);
- if (cm->cd[co].nchrs == 1 && cm->cd[co].sub == NOSUB)
- return 1;
- return 0;
-}
-
-/*
- * rainbow - add arcs of all full colors (but one) between specified states
- */
-static void
-rainbow(struct nfa * nfa,
- struct colormap * cm,
- int type,
- pcolor but, /* COLORLESS if no exceptions */
- struct state * from,
- struct state * to)
-{
- struct colordesc *cd;
- struct colordesc *end = CDEND(cm);
- color co;
-
- for (cd = cm->cd, co = 0; cd < end && !CISERR(); cd++, co++)
- if (!UNUSEDCOLOR(cd) && cd->sub != co && co != but &&
- !(cd->flags & PSEUDO))
- newarc(nfa, type, co, from, to);
-}
-
-/*
- * colorcomplement - add arcs of complementary colors
- *
- * The calling sequence ought to be reconciled with cloneouts().
- */
-static void
-colorcomplement(struct nfa * nfa,
- struct colormap * cm,
- int type,
- struct state * of, /* complements of this guy's PLAIN
- * outarcs */
- struct state * from,
- struct state * to)
-{
- struct colordesc *cd;
- struct colordesc *end = CDEND(cm);
- color co;
-
- assert(of != from);
- for (cd = cm->cd, co = 0; cd < end && !CISERR(); cd++, co++)
- if (!UNUSEDCOLOR(cd) && !(cd->flags & PSEUDO))
- if (findarc(of, PLAIN, co) == NULL)
- newarc(nfa, type, co, from, to);
-}
-
-
-#ifdef REG_DEBUG
-
-/*
- * dumpcolors - debugging output
- */
-static void
-dumpcolors(struct colormap * cm,
- FILE *f)
-{
- struct colordesc *cd;
- struct colordesc *end;
- color co;
- chr c;
- char *has;
-
- fprintf(f, "max %ld\n", (long) cm->max);
- if (NBYTS > 1)
- fillcheck(cm, cm->tree, 0, f);
- end = CDEND(cm);
- for (cd = cm->cd + 1, co = 1; cd < end; cd++, co++) /* skip 0 */
- if (!UNUSEDCOLOR(cd))
- {
- assert(cd->nchrs > 0);
- has = (cd->block != NULL) ? "#" : "";
- if (cd->flags & PSEUDO)
- fprintf(f, "#%2ld%s(ps): ", (long) co, has);
- else
- fprintf(f, "#%2ld%s(%2d): ", (long) co,
- has, cd->nchrs);
- /* it's hard to do this more efficiently */
- for (c = CHR_MIN; c < CHR_MAX; c++)
- if (GETCOLOR(cm, c) == co)
- dumpchr(c, f);
- assert(c == CHR_MAX);
- if (GETCOLOR(cm, c) == co)
- dumpchr(c, f);
- fprintf(f, "\n");
- }
-}
-
-/*
- * fillcheck - check proper filling of a tree
- */
-static void
-fillcheck(struct colormap * cm,
- union tree * tree,
- int level, /* level number (top == 0) of this block */
- FILE *f)
-{
- int i;
- union tree *t;
- union tree *fillt = &cm->tree[level + 1];
-
- assert(level < NBYTS - 1); /* this level has pointers */
- for (i = BYTTAB - 1; i >= 0; i--)
- {
- t = tree->tptr[i];
- if (t == NULL)
- fprintf(f, "NULL found in filled tree!\n");
- else if (t == fillt)
- {
- }
- else if (level < NBYTS - 2) /* more pointer blocks below */
- fillcheck(cm, t, level + 1, f);
- }
-}
-
-/*
- * dumpchr - print a chr
- *
- * Kind of char-centric but works well enough for debug use.
- */
-static void
-dumpchr(chr c,
- FILE *f)
-{
- if (c == '\\')
- fprintf(f, "\\\\");
- else if (c > ' ' && c <= '~')
- putc((char) c, f);
- else
- fprintf(f, "\\u%04lx", (long) c);
-}
-
-#endif /* REG_DEBUG */
diff --git a/src/regex/regc_cvec.c b/src/regex/regc_cvec.c
deleted file mode 100644
index b6aa8c98f1..0000000000
--- a/src/regex/regc_cvec.c
+++ /dev/null
@@ -1,189 +0,0 @@
-/*
- * Utility functions for handling cvecs
- * This file is #included by regcomp.c.
- *
- * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved.
- *
- * Development of this software was funded, in part, by Cray Research Inc.,
- * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
- * Corporation, none of whom are responsible for the results. The author
- * thanks all of them.
- *
- * Redistribution and use in source and binary forms -- with or without
- * modification -- are permitted for any purpose, provided that
- * redistributions in source form retain this entire copyright notice and
- * indicate the origin and nature of any modifications.
- *
- * I'd appreciate being given credit for this package in the documentation
- * of software which uses it, but that is not a requirement.
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
- * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
- * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
- * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
- * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
- * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
- * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
- * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * $Header$
- *
- */
-
-/*
- * newcvec - allocate a new cvec
- */
-static struct cvec *
-newcvec(int nchrs, /* to hold this many chrs... */
- int nranges, /* ... and this many ranges... */
- int nmcces) /* ... and this many MCCEs */
-{
- size_t n;
- size_t nc;
- struct cvec *cv;
-
- nc = (size_t) nchrs + (size_t) nmcces *(MAXMCCE + 1) + (size_t) nranges *2;
-
- n = sizeof(struct cvec) + (size_t) (nmcces - 1) * sizeof(chr *)
- + nc * sizeof(chr);
- cv = (struct cvec *) MALLOC(n);
- if (cv == NULL)
- return NULL;
- cv->chrspace = nchrs;
- cv->chrs = (chr *) &cv->mcces[nmcces]; /* chrs just after MCCE
- * ptrs */
- cv->mccespace = nmcces;
- cv->ranges = cv->chrs + nchrs + nmcces * (MAXMCCE + 1);
- cv->rangespace = nranges;
- return clearcvec(cv);
-}
-
-/*
- * clearcvec - clear a possibly-new cvec
- * Returns pointer as convenience.
- */
-static struct cvec *
-clearcvec(struct cvec * cv)
-{
- int i;
-
- assert(cv != NULL);
- cv->nchrs = 0;
- assert(cv->chrs == (chr *) &cv->mcces[cv->mccespace]);
- cv->nmcces = 0;
- cv->nmccechrs = 0;
- cv->nranges = 0;
- for (i = 0; i < cv->mccespace; i++)
- cv->mcces[i] = NULL;
-
- return cv;
-}
-
-/*
- * addchr - add a chr to a cvec
- */
-static void
-addchr(struct cvec * cv, /* character vector */
- chr c) /* character to add */
-{
- assert(cv->nchrs < cv->chrspace - cv->nmccechrs);
- cv->chrs[cv->nchrs++] = (chr) c;
-}
-
-/*
- * addrange - add a range to a cvec
- */
-static void
-addrange(struct cvec * cv, /* character vector */
- chr from, /* first character of range */
- chr to) /* last character of range */
-{
- assert(cv->nranges < cv->rangespace);
- cv->ranges[cv->nranges * 2] = (chr) from;
- cv->ranges[cv->nranges * 2 + 1] = (chr) to;
- cv->nranges++;
-}
-
-/*
- * addmcce - add an MCCE to a cvec
- */
-static void
-addmcce(struct cvec * cv, /* character vector */
- chr *startp, /* beginning of text */
- chr *endp) /* just past end of text */
-{
- int len;
- int i;
- chr *s;
- chr *d;
-
- if (startp == NULL && endp == NULL)
- return;
- len = endp - startp;
- assert(len > 0);
- assert(cv->nchrs + len < cv->chrspace - cv->nmccechrs);
- assert(cv->nmcces < cv->mccespace);
- d = &cv->chrs[cv->chrspace - cv->nmccechrs - len - 1];
- cv->mcces[cv->nmcces++] = d;
- for (s = startp, i = len; i > 0; s++, i--)
- *d++ = *s;
- *d++ = 0; /* endmarker */
- assert(d == &cv->chrs[cv->chrspace - cv->nmccechrs]);
- cv->nmccechrs += len + 1;
-}
-
-/*
- * haschr - does a cvec contain this chr?
- */
-static int /* predicate */
-haschr(struct cvec * cv, /* character vector */
- chr c) /* character to test for */
-{
- int i;
- chr *p;
-
- for (p = cv->chrs, i = cv->nchrs; i > 0; p++, i--)
- {
- if (*p == c)
- return 1;
- }
- for (p = cv->ranges, i = cv->nranges; i > 0; p += 2, i--)
- {
- if ((*p <= c) && (c <= *(p + 1)))
- return 1;
- }
- return 0;
-}
-
-/*
- * getcvec - get a cvec, remembering it as v->cv
- */
-static struct cvec *
-getcvec(struct vars * v, /* context */
- int nchrs, /* to hold this many chrs... */
- int nranges, /* ... and this many ranges... */
- int nmcces) /* ... and this many MCCEs */
-{
- if (v->cv != NULL && nchrs <= v->cv->chrspace &&
- nranges <= v->cv->rangespace && nmcces <= v->cv->mccespace)
- return clearcvec(v->cv);
-
- if (v->cv != NULL)
- freecvec(v->cv);
- v->cv = newcvec(nchrs, nranges, nmcces);
- if (v->cv == NULL)
- ERR(REG_ESPACE);
-
- return v->cv;
-}
-
-/*
- * freecvec - free a cvec
- */
-static void
-freecvec(struct cvec * cv)
-{
- FREE(cv);
-}
diff --git a/src/regex/regc_lex.c b/src/regex/regc_lex.c
deleted file mode 100644
index a24290d1a1..0000000000
--- a/src/regex/regc_lex.c
+++ /dev/null
@@ -1,1146 +0,0 @@
-/*
- * lexical analyzer
- * This file is #included by regcomp.c.
- *
- * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved.
- *
- * Development of this software was funded, in part, by Cray Research Inc.,
- * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
- * Corporation, none of whom are responsible for the results. The author
- * thanks all of them.
- *
- * Redistribution and use in source and binary forms -- with or without
- * modification -- are permitted for any purpose, provided that
- * redistributions in source form retain this entire copyright notice and
- * indicate the origin and nature of any modifications.
- *
- * I'd appreciate being given credit for this package in the documentation
- * of software which uses it, but that is not a requirement.
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
- * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
- * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
- * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
- * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
- * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
- * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
- * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * $Header$
- *
- */
-
-/* scanning macros (know about v) */
-#define ATEOS() (v->now >= v->stop)
-#define HAVE(n) (v->stop - v->now >= (n))
-#define NEXT1(c) (!ATEOS() && *v->now == CHR(c))
-#define NEXT2(a,b) (HAVE(2) && *v->now == CHR(a) && *(v->now+1) == CHR(b))
-#define NEXT3(a,b,c) (HAVE(3) && *v->now == CHR(a) && \
- *(v->now+1) == CHR(b) && \
- *(v->now+2) == CHR(c))
-#define SET(c) (v->nexttype = (c))
-#define SETV(c, n) (v->nexttype = (c), v->nextvalue = (n))
-#define RET(c) return (SET(c), 1)
-#define RETV(c, n) return (SETV(c, n), 1)
-#define FAILW(e) return (ERR(e), 0) /* ERR does SET(EOS) */
-#define LASTTYPE(t) (v->lasttype == (t))
-
-/* lexical contexts */
-#define L_ERE 1 /* mainline ERE/ARE */
-#define L_BRE 2 /* mainline BRE */
-#define L_Q 3 /* REG_QUOTE */
-#define L_EBND 4 /* ERE/ARE bound */
-#define L_BBND 5 /* BRE bound */
-#define L_BRACK 6 /* brackets */
-#define L_CEL 7 /* collating element */
-#define L_ECL 8 /* equivalence class */
-#define L_CCL 9 /* character class */
-#define INTOCON(c) (v->lexcon = (c))
-#define INCON(con) (v->lexcon == (con))
-
-/* construct pointer past end of chr array */
-#define ENDOF(array) ((array) + sizeof(array)/sizeof(chr))
-
-/*
- * lexstart - set up lexical stuff, scan leading options
- */
-static void
-lexstart(struct vars * v)
-{
- prefixes(v); /* may turn on new type bits etc. */
- NOERR();
-
- if (v->cflags & REG_QUOTE)
- {
- assert(!(v->cflags & (REG_ADVANCED | REG_EXPANDED | REG_NEWLINE)));
- INTOCON(L_Q);
- }
- else if (v->cflags & REG_EXTENDED)
- {
- assert(!(v->cflags & REG_QUOTE));
- INTOCON(L_ERE);
- }
- else
- {
- assert(!(v->cflags & (REG_QUOTE | REG_ADVF)));
- INTOCON(L_BRE);
- }
-
- v->nexttype = EMPTY; /* remember we were at the start */
- next(v); /* set up the first token */
-}
-
-/*
- * prefixes - implement various special prefixes
- */
-static void
-prefixes(struct vars * v)
-{
- /* literal string doesn't get any of this stuff */
- if (v->cflags & REG_QUOTE)
- return;
-
- /* initial "***" gets special things */
- if (HAVE(4) && NEXT3('*', '*', '*'))
- switch (*(v->now + 3))
- {
- case CHR('?'): /* "***?" error, msg shows version */
- ERR(REG_BADPAT);
- return; /* proceed no further */
- break;
- case CHR('='): /* "***=" shifts to literal string */
- NOTE(REG_UNONPOSIX);
- v->cflags |= REG_QUOTE;
- v->cflags &= ~(REG_ADVANCED | REG_EXPANDED | REG_NEWLINE);
- v->now += 4;
- return; /* and there can be no more prefixes */
- break;
- case CHR(':'): /* "***:" shifts to AREs */
- NOTE(REG_UNONPOSIX);
- v->cflags |= REG_ADVANCED;
- v->now += 4;
- break;
- default: /* otherwise *** is just an error */
- ERR(REG_BADRPT);
- return;
- break;
- }
-
- /* BREs and EREs don't get embedded options */
- if ((v->cflags & REG_ADVANCED) != REG_ADVANCED)
- return;
-
- /* embedded options (AREs only) */
- if (HAVE(3) && NEXT2('(', '?') && iscalpha(*(v->now + 2)))
- {
- NOTE(REG_UNONPOSIX);
- v->now += 2;
- for (; !ATEOS() && iscalpha(*v->now); v->now++)
- switch (*v->now)
- {
- case CHR('b'): /* BREs (but why???) */
- v->cflags &= ~(REG_ADVANCED | REG_QUOTE);
- break;
- case CHR('c'): /* case sensitive */
- v->cflags &= ~REG_ICASE;
- break;
- case CHR('e'): /* plain EREs */
- v->cflags |= REG_EXTENDED;
- v->cflags &= ~(REG_ADVF | REG_QUOTE);
- break;
- case CHR('i'): /* case insensitive */
- v->cflags |= REG_ICASE;
- break;
- case CHR('m'): /* Perloid synonym for n */
- case CHR('n'): /* \n affects ^ $ . [^ */
- v->cflags |= REG_NEWLINE;
- break;
- case CHR('p'): /* ~Perl, \n affects . [^ */
- v->cflags |= REG_NLSTOP;
- v->cflags &= ~REG_NLANCH;
- break;
- case CHR('q'): /* literal string */
- v->cflags |= REG_QUOTE;
- v->cflags &= ~REG_ADVANCED;
- break;
- case CHR('s'): /* single line, \n ordinary */
- v->cflags &= ~REG_NEWLINE;
- break;
- case CHR('t'): /* tight syntax */
- v->cflags &= ~REG_EXPANDED;
- break;
- case CHR('w'): /* weird, \n affects ^ $ only */
- v->cflags &= ~REG_NLSTOP;
- v->cflags |= REG_NLANCH;
- break;
- case CHR('x'): /* expanded syntax */
- v->cflags |= REG_EXPANDED;
- break;
- default:
- ERR(REG_BADOPT);
- return;
- }
- if (!NEXT1(')'))
- {
- ERR(REG_BADOPT);
- return;
- }
- v->now++;
- if (v->cflags & REG_QUOTE)
- v->cflags &= ~(REG_EXPANDED | REG_NEWLINE);
- }
-}
-
-/*
- * lexnest - "call a subroutine", interpolating string at the lexical level
- *
- * Note, this is not a very general facility. There are a number of
- * implicit assumptions about what sorts of strings can be subroutines.
- */
-static void
-lexnest(struct vars * v,
- chr *beginp, /* start of interpolation */
- chr *endp) /* one past end of interpolation */
-{
- assert(v->savenow == NULL); /* only one level of nesting */
- v->savenow = v->now;
- v->savestop = v->stop;
- v->now = beginp;
- v->stop = endp;
-}
-
-/*
- * string constants to interpolate as expansions of things like \d
- */
-static chr backd[] = { /* \d */
- CHR('['), CHR('['), CHR(':'),
- CHR('d'), CHR('i'), CHR('g'), CHR('i'), CHR('t'),
- CHR(':'), CHR(']'), CHR(']')
-};
-static chr backD[] = { /* \D */
- CHR('['), CHR('^'), CHR('['), CHR(':'),
- CHR('d'), CHR('i'), CHR('g'), CHR('i'), CHR('t'),
- CHR(':'), CHR(']'), CHR(']')
-};
-static chr brbackd[] = { /* \d within brackets */
- CHR('['), CHR(':'),
- CHR('d'), CHR('i'), CHR('g'), CHR('i'), CHR('t'),
- CHR(':'), CHR(']')
-};
-static chr backs[] = { /* \s */
- CHR('['), CHR('['), CHR(':'),
- CHR('s'), CHR('p'), CHR('a'), CHR('c'), CHR('e'),
- CHR(':'), CHR(']'), CHR(']')
-};
-static chr backS[] = { /* \S */
- CHR('['), CHR('^'), CHR('['), CHR(':'),
- CHR('s'), CHR('p'), CHR('a'), CHR('c'), CHR('e'),
- CHR(':'), CHR(']'), CHR(']')
-};
-static chr brbacks[] = { /* \s within brackets */
- CHR('['), CHR(':'),
- CHR('s'), CHR('p'), CHR('a'), CHR('c'), CHR('e'),
- CHR(':'), CHR(']')
-};
-static chr backw[] = { /* \w */
- CHR('['), CHR('['), CHR(':'),
- CHR('a'), CHR('l'), CHR('n'), CHR('u'), CHR('m'),
- CHR(':'), CHR(']'), CHR('_'), CHR(']')
-};
-static chr backW[] = { /* \W */
- CHR('['), CHR('^'), CHR('['), CHR(':'),
- CHR('a'), CHR('l'), CHR('n'), CHR('u'), CHR('m'),
- CHR(':'), CHR(']'), CHR('_'), CHR(']')
-};
-static chr brbackw[] = { /* \w within brackets */
- CHR('['), CHR(':'),
- CHR('a'), CHR('l'), CHR('n'), CHR('u'), CHR('m'),
- CHR(':'), CHR(']'), CHR('_')
-};
-
-/*
- * lexword - interpolate a bracket expression for word characters
- * Possibly ought to inquire whether there is a "word" character class.
- */
-static void
-lexword(struct vars * v)
-{
- lexnest(v, backw, ENDOF(backw));
-}
-
-/*
- * next - get next token
- */
-static int /* 1 normal, 0 failure */
-next(struct vars * v)
-{
- chr c;
-
- /* errors yield an infinite sequence of failures */
- if (ISERR())
- return 0; /* the error has set nexttype to EOS */
-
- /* remember flavor of last token */
- v->lasttype = v->nexttype;
-
- /* REG_BOSONLY */
- if (v->nexttype == EMPTY && (v->cflags & REG_BOSONLY))
- {
- /* at start of a REG_BOSONLY RE */
- RETV(SBEGIN, 0); /* same as \A */
- }
-
- /* if we're nested and we've hit end, return to outer level */
- if (v->savenow != NULL && ATEOS())
- {
- v->now = v->savenow;
- v->stop = v->savestop;
- v->savenow = v->savestop = NULL;
- }
-
- /* skip white space etc. if appropriate (not in literal or []) */
- if (v->cflags & REG_EXPANDED)
- switch (v->lexcon)
- {
- case L_ERE:
- case L_BRE:
- case L_EBND:
- case L_BBND:
- skip(v);
- break;
- }
-
- /* handle EOS, depending on context */
- if (ATEOS())
- {
- switch (v->lexcon)
- {
- case L_ERE:
- case L_BRE:
- case L_Q:
- RET(EOS);
- break;
- case L_EBND:
- case L_BBND:
- FAILW(REG_EBRACE);
- break;
- case L_BRACK:
- case L_CEL:
- case L_ECL:
- case L_CCL:
- FAILW(REG_EBRACK);
- break;
- }
- assert(NOTREACHED);
- }
-
- /* okay, time to actually get a character */
- c = *v->now++;
-
- /* deal with the easy contexts, punt EREs to code below */
- switch (v->lexcon)
- {
- case L_BRE: /* punt BREs to separate function */
- return brenext(v, c);
- break;
- case L_ERE: /* see below */
- break;
- case L_Q: /* literal strings are easy */
- RETV(PLAIN, c);
- break;
- case L_BBND: /* bounds are fairly simple */
- case L_EBND:
- switch (c)
- {
- case CHR('0'):
- case CHR('1'):
- case CHR('2'):
- case CHR('3'):
- case CHR('4'):
- case CHR('5'):
- case CHR('6'):
- case CHR('7'):
- case CHR('8'):
- case CHR('9'):
- RETV(DIGIT, (chr) DIGITVAL(c));
- break;
- case CHR(','):
- RET(',');
- break;
- case CHR('}'): /* ERE bound ends with } */
- if (INCON(L_EBND))
- {
- INTOCON(L_ERE);
- if ((v->cflags & REG_ADVF) && NEXT1('?'))
- {
- v->now++;
- NOTE(REG_UNONPOSIX);
- RETV('}', 0);
- }
- RETV('}', 1);
- }
- else
- FAILW(REG_BADBR);
- break;
- case CHR('\\'): /* BRE bound ends with \} */
- if (INCON(L_BBND) && NEXT1('}'))
- {
- v->now++;
- INTOCON(L_BRE);
- RET('}');
- }
- else
- FAILW(REG_BADBR);
- break;
- default:
- FAILW(REG_BADBR);
- break;
- }
- assert(NOTREACHED);
- break;
- case L_BRACK: /* brackets are not too hard */
- switch (c)
- {
- case CHR(']'):
- if (LASTTYPE('['))
- RETV(PLAIN, c);
- else
- {
- INTOCON((v->cflags & REG_EXTENDED) ?
- L_ERE : L_BRE);
- RET(']');
- }
- break;
- case CHR('\\'):
- NOTE(REG_UBBS);
- if (!(v->cflags & REG_ADVF))
- RETV(PLAIN, c);
- NOTE(REG_UNONPOSIX);
- if (ATEOS())
- FAILW(REG_EESCAPE);
- (DISCARD) lexescape(v);
- switch (v->nexttype)
- { /* not all escapes okay here */
- case PLAIN:
- return 1;
- break;
- case CCLASS:
- switch (v->nextvalue)
- {
- case 'd':
- lexnest(v, brbackd, ENDOF(brbackd));
- break;
- case 's':
- lexnest(v, brbacks, ENDOF(brbacks));
- break;
- case 'w':
- lexnest(v, brbackw, ENDOF(brbackw));
- break;
- default:
- FAILW(REG_EESCAPE);
- break;
- }
- /* lexnest done, back up and try again */
- v->nexttype = v->lasttype;
- return next(v);
- break;
- }
- /* not one of the acceptable escapes */
- FAILW(REG_EESCAPE);
- break;
- case CHR('-'):
- if (LASTTYPE('[') || NEXT1(']'))
- RETV(PLAIN, c);
- else
- RETV(RANGE, c);
- break;
- case CHR('['):
- if (ATEOS())
- FAILW(REG_EBRACK);
- switch (*v->now++)
- {
- case CHR('.'):
- INTOCON(L_CEL);
- /* might or might not be locale-specific */
- RET(COLLEL);
- break;
- case CHR('='):
- INTOCON(L_ECL);
- NOTE(REG_ULOCALE);
- RET(ECLASS);
- break;
- case CHR(':'):
- INTOCON(L_CCL);
- NOTE(REG_ULOCALE);
- RET(CCLASS);
- break;
- default: /* oops */
- v->now--;
- RETV(PLAIN, c);
- break;
- }
- assert(NOTREACHED);
- break;
- default:
- RETV(PLAIN, c);
- break;
- }
- assert(NOTREACHED);
- break;
- case L_CEL: /* collating elements are easy */
- if (c == CHR('.') && NEXT1(']'))
- {
- v->now++;
- INTOCON(L_BRACK);
- RETV(END, '.');
- }
- else
- RETV(PLAIN, c);
- break;
- case L_ECL: /* ditto equivalence classes */
- if (c == CHR('=') && NEXT1(']'))
- {
- v->now++;
- INTOCON(L_BRACK);
- RETV(END, '=');
- }
- else
- RETV(PLAIN, c);
- break;
- case L_CCL: /* ditto character classes */
- if (c == CHR(':') && NEXT1(']'))
- {
- v->now++;
- INTOCON(L_BRACK);
- RETV(END, ':');
- }
- else
- RETV(PLAIN, c);
- break;
- default:
- assert(NOTREACHED);
- break;
- }
-
- /* that got rid of everything except EREs and AREs */
- assert(INCON(L_ERE));
-
- /* deal with EREs and AREs, except for backslashes */
- switch (c)
- {
- case CHR('|'):
- RET('|');
- break;
- case CHR('*'):
- if ((v->cflags & REG_ADVF) && NEXT1('?'))
- {
- v->now++;
- NOTE(REG_UNONPOSIX);
- RETV('*', 0);
- }
- RETV('*', 1);
- break;
- case CHR('+'):
- if ((v->cflags & REG_ADVF) && NEXT1('?'))
- {
- v->now++;
- NOTE(REG_UNONPOSIX);
- RETV('+', 0);
- }
- RETV('+', 1);
- break;
- case CHR('?'):
- if ((v->cflags & REG_ADVF) && NEXT1('?'))
- {
- v->now++;
- NOTE(REG_UNONPOSIX);
- RETV('?', 0);
- }
- RETV('?', 1);
- break;
- case CHR('{'): /* bounds start or plain character */
- if (v->cflags & REG_EXPANDED)
- skip(v);
- if (ATEOS() || !iscdigit(*v->now))
- {
- NOTE(REG_UBRACES);
- NOTE(REG_UUNSPEC);
- RETV(PLAIN, c);
- }
- else
- {
- NOTE(REG_UBOUNDS);
- INTOCON(L_EBND);
- RET('{');
- }
- assert(NOTREACHED);
- break;
- case CHR('('): /* parenthesis, or advanced extension */
- if ((v->cflags & REG_ADVF) && NEXT1('?'))
- {
- NOTE(REG_UNONPOSIX);
- v->now++;
- switch (*v->now++)
- {
- case CHR(':'): /* non-capturing paren */
- RETV('(', 0);
- break;
- case CHR('#'): /* comment */
- while (!ATEOS() && *v->now != CHR(')'))
- v->now++;
- if (!ATEOS())
- v->now++;
- assert(v->nexttype == v->lasttype);
- return next(v);
- break;
- case CHR('='): /* positive lookahead */
- NOTE(REG_ULOOKAHEAD);
- RETV(LACON, 1);
- break;
- case CHR('!'): /* negative lookahead */
- NOTE(REG_ULOOKAHEAD);
- RETV(LACON, 0);
- break;
- default:
- FAILW(REG_BADRPT);
- break;
- }
- assert(NOTREACHED);
- }
- if (v->cflags & REG_NOSUB)
- RETV('(', 0); /* all parens non-capturing */
- else
- RETV('(', 1);
- break;
- case CHR(')'):
- if (LASTTYPE('('))
- NOTE(REG_UUNSPEC);
- RETV(')', c);
- break;
- case CHR('['): /* easy except for [[:<:]] and [[:>:]] */
- if (HAVE(6) && *(v->now + 0) == CHR('[') &&
- *(v->now + 1) == CHR(':') &&
- (*(v->now + 2) == CHR('<') ||
- *(v->now + 2) == CHR('>')) &&
- *(v->now + 3) == CHR(':') &&
- *(v->now + 4) == CHR(']') &&
- *(v->now + 5) == CHR(']'))
- {
- c = *(v->now + 2);
- v->now += 6;
- NOTE(REG_UNONPOSIX);
- RET((c == CHR('<')) ? '<' : '>');
- }
- INTOCON(L_BRACK);
- if (NEXT1('^'))
- {
- v->now++;
- RETV('[', 0);
- }
- RETV('[', 1);
- break;
- case CHR('.'):
- RET('.');
- break;
- case CHR('^'):
- RET('^');
- break;
- case CHR('$'):
- RET('$');
- break;
- case CHR('\\'): /* mostly punt backslashes to code below */
- if (ATEOS())
- FAILW(REG_EESCAPE);
- break;
- default: /* ordinary character */
- RETV(PLAIN, c);
- break;
- }
-
- /* ERE/ARE backslash handling; backslash already eaten */
- assert(!ATEOS());
- if (!(v->cflags & REG_ADVF))
- { /* only AREs have non-trivial escapes */
- if (iscalnum(*v->now))
- {
- NOTE(REG_UBSALNUM);
- NOTE(REG_UUNSPEC);
- }
- RETV(PLAIN, *v->now++);
- }
- (DISCARD) lexescape(v);
- if (ISERR())
- FAILW(REG_EESCAPE);
- if (v->nexttype == CCLASS)
- { /* fudge at lexical level */
- switch (v->nextvalue)
- {
- case 'd':
- lexnest(v, backd, ENDOF(backd));
- break;
- case 'D':
- lexnest(v, backD, ENDOF(backD));
- break;
- case 's':
- lexnest(v, backs, ENDOF(backs));
- break;
- case 'S':
- lexnest(v, backS, ENDOF(backS));
- break;
- case 'w':
- lexnest(v, backw, ENDOF(backw));
- break;
- case 'W':
- lexnest(v, backW, ENDOF(backW));
- break;
- default:
- assert(NOTREACHED);
- FAILW(REG_ASSERT);
- break;
- }
- /* lexnest done, back up and try again */
- v->nexttype = v->lasttype;
- return next(v);
- }
- /* otherwise, lexescape has already done the work */
- return !ISERR();
-}
-
-/*
- * lexescape - parse an ARE backslash escape (backslash already eaten)
- * Note slightly nonstandard use of the CCLASS type code.
- */
-static int /* not actually used, but convenient for
- * RETV */
-lexescape(struct vars * v)
-{
- chr c;
- static chr alert[] = {
- CHR('a'), CHR('l'), CHR('e'), CHR('r'), CHR('t')
- };
- static chr esc[] = {
- CHR('E'), CHR('S'), CHR('C')
- };
- chr *save;
-
- assert(v->cflags & REG_ADVF);
-
- assert(!ATEOS());
- c = *v->now++;
- if (!iscalnum(c))
- RETV(PLAIN, c);
-
- NOTE(REG_UNONPOSIX);
- switch (c)
- {
- case CHR('a'):
- RETV(PLAIN, chrnamed(v, alert, ENDOF(alert), CHR('\007')));
- break;
- case CHR('A'):
- RETV(SBEGIN, 0);
- break;
- case CHR('b'):
- RETV(PLAIN, CHR('\b'));
- break;
- case CHR('B'):
- RETV(PLAIN, CHR('\\'));
- break;
- case CHR('c'):
- NOTE(REG_UUNPORT);
- if (ATEOS())
- FAILW(REG_EESCAPE);
- RETV(PLAIN, (chr) (*v->now++ & 037));
- break;
- case CHR('d'):
- NOTE(REG_ULOCALE);
- RETV(CCLASS, 'd');
- break;
- case CHR('D'):
- NOTE(REG_ULOCALE);
- RETV(CCLASS, 'D');
- break;
- case CHR('e'):
- NOTE(REG_UUNPORT);
- RETV(PLAIN, chrnamed(v, esc, ENDOF(esc), CHR('\033')));
- break;
- case CHR('f'):
- RETV(PLAIN, CHR('\f'));
- break;
- case CHR('m'):
- RET('<');
- break;
- case CHR('M'):
- RET('>');
- break;
- case CHR('n'):
- RETV(PLAIN, CHR('\n'));
- break;
- case CHR('r'):
- RETV(PLAIN, CHR('\r'));
- break;
- case CHR('s'):
- NOTE(REG_ULOCALE);
- RETV(CCLASS, 's');
- break;
- case CHR('S'):
- NOTE(REG_ULOCALE);
- RETV(CCLASS, 'S');
- break;
- case CHR('t'):
- RETV(PLAIN, CHR('\t'));
- break;
- case CHR('u'):
- c = lexdigits(v, 16, 4, 4);
- if (ISERR())
- FAILW(REG_EESCAPE);
- RETV(PLAIN, c);
- break;
- case CHR('U'):
- c = lexdigits(v, 16, 8, 8);
- if (ISERR())
- FAILW(REG_EESCAPE);
- RETV(PLAIN, c);
- break;
- case CHR('v'):
- RETV(PLAIN, CHR('\v'));
- break;
- case CHR('w'):
- NOTE(REG_ULOCALE);
- RETV(CCLASS, 'w');
- break;
- case CHR('W'):
- NOTE(REG_ULOCALE);
- RETV(CCLASS, 'W');
- break;
- case CHR('x'):
- NOTE(REG_UUNPORT);
- c = lexdigits(v, 16, 1, 255); /* REs >255 long outside
- * spec */
- if (ISERR())
- FAILW(REG_EESCAPE);
- RETV(PLAIN, c);
- break;
- case CHR('y'):
- NOTE(REG_ULOCALE);
- RETV(WBDRY, 0);
- break;
- case CHR('Y'):
- NOTE(REG_ULOCALE);
- RETV(NWBDRY, 0);
- break;
- case CHR('Z'):
- RETV(SEND, 0);
- break;
- case CHR('1'):
- case CHR('2'):
- case CHR('3'):
- case CHR('4'):
- case CHR('5'):
- case CHR('6'):
- case CHR('7'):
- case CHR('8'):
- case CHR('9'):
- save = v->now;
- v->now--; /* put first digit back */
- c = lexdigits(v, 10, 1, 255); /* REs >255 long outside
- * spec */
- if (ISERR())
- FAILW(REG_EESCAPE);
- /* ugly heuristic (first test is "exactly 1 digit?") */
- if (v->now - save == 0 || (int) c <= v->nsubexp)
- {
- NOTE(REG_UBACKREF);
- RETV(BACKREF, (chr) c);
- }
- /* oops, doesn't look like it's a backref after all... */
- v->now = save;
- /* and fall through into octal number */
- case CHR('0'):
- NOTE(REG_UUNPORT);
- v->now--; /* put first digit back */
- c = lexdigits(v, 8, 1, 3);
- if (ISERR())
- FAILW(REG_EESCAPE);
- RETV(PLAIN, c);
- break;
- default:
- assert(iscalpha(c));
- FAILW(REG_EESCAPE); /* unknown alphabetic escape */
- break;
- }
- assert(NOTREACHED);
-}
-
-/*
- * lexdigits - slurp up digits and return chr value
- */
-static chr /* chr value; errors signalled via ERR */
-lexdigits(struct vars * v,
- int base,
- int minlen,
- int maxlen)
-{
- uchr n; /* unsigned to avoid overflow misbehavior */
- int len;
- chr c;
- int d;
- const uchr ub = (uchr) base;
-
- n = 0;
- for (len = 0; len < maxlen && !ATEOS(); len++)
- {
- c = *v->now++;
- switch (c)
- {
- case CHR('0'):
- case CHR('1'):
- case CHR('2'):
- case CHR('3'):
- case CHR('4'):
- case CHR('5'):
- case CHR('6'):
- case CHR('7'):
- case CHR('8'):
- case CHR('9'):
- d = DIGITVAL(c);
- break;
- case CHR('a'):
- case CHR('A'):
- d = 10;
- break;
- case CHR('b'):
- case CHR('B'):
- d = 11;
- break;
- case CHR('c'):
- case CHR('C'):
- d = 12;
- break;
- case CHR('d'):
- case CHR('D'):
- d = 13;
- break;
- case CHR('e'):
- case CHR('E'):
- d = 14;
- break;
- case CHR('f'):
- case CHR('F'):
- d = 15;
- break;
- default:
- v->now--; /* oops, not a digit at all */
- d = -1;
- break;
- }
-
- if (d >= base)
- { /* not a plausible digit */
- v->now--;
- d = -1;
- }
- if (d < 0)
- break; /* NOTE BREAK OUT */
- n = n * ub + (uchr) d;
- }
- if (len < minlen)
- ERR(REG_EESCAPE);
-
- return (chr) n;
-}
-
-/*
- * brenext - get next BRE token
- *
- * This is much like EREs except for all the stupid backslashes and the
- * context-dependency of some things.
- */
-static int /* 1 normal, 0 failure */
-brenext(struct vars * v,
- chr pc)
-{
- chr c = (chr) pc;
-
- switch (c)
- {
- case CHR('*'):
- if (LASTTYPE(EMPTY) || LASTTYPE('(') || LASTTYPE('^'))
- RETV(PLAIN, c);
- RET('*');
- break;
- case CHR('['):
- if (HAVE(6) && *(v->now + 0) == CHR('[') &&
- *(v->now + 1) == CHR(':') &&
- (*(v->now + 2) == CHR('<') ||
- *(v->now + 2) == CHR('>')) &&
- *(v->now + 3) == CHR(':') &&
- *(v->now + 4) == CHR(']') &&
- *(v->now + 5) == CHR(']'))
- {
- c = *(v->now + 2);
- v->now += 6;
- NOTE(REG_UNONPOSIX);
- RET((c == CHR('<')) ? '<' : '>');
- }
- INTOCON(L_BRACK);
- if (NEXT1('^'))
- {
- v->now++;
- RETV('[', 0);
- }
- RETV('[', 1);
- break;
- case CHR('.'):
- RET('.');
- break;
- case CHR('^'):
- if (LASTTYPE(EMPTY))
- RET('^');
- if (LASTTYPE('('))
- {
- NOTE(REG_UUNSPEC);
- RET('^');
- }
- RETV(PLAIN, c);
- break;
- case CHR('$'):
- if (v->cflags & REG_EXPANDED)
- skip(v);
- if (ATEOS())
- RET('$');
- if (NEXT2('\\', ')'))
- {
- NOTE(REG_UUNSPEC);
- RET('$');
- }
- RETV(PLAIN, c);
- break;
- case CHR('\\'):
- break; /* see below */
- default:
- RETV(PLAIN, c);
- break;
- }
-
- assert(c == CHR('\\'));
-
- if (ATEOS())
- FAILW(REG_EESCAPE);
-
- c = *v->now++;
- switch (c)
- {
- case CHR('{'):
- INTOCON(L_BBND);
- NOTE(REG_UBOUNDS);
- RET('{');
- break;
- case CHR('('):
- RETV('(', 1);
- break;
- case CHR(')'):
- RETV(')', c);
- break;
- case CHR('<'):
- NOTE(REG_UNONPOSIX);
- RET('<');
- break;
- case CHR('>'):
- NOTE(REG_UNONPOSIX);
- RET('>');
- break;
- case CHR('1'):
- case CHR('2'):
- case CHR('3'):
- case CHR('4'):
- case CHR('5'):
- case CHR('6'):
- case CHR('7'):
- case CHR('8'):
- case CHR('9'):
- NOTE(REG_UBACKREF);
- RETV(BACKREF, (chr) DIGITVAL(c));
- break;
- default:
- if (iscalnum(c))
- {
- NOTE(REG_UBSALNUM);
- NOTE(REG_UUNSPEC);
- }
- RETV(PLAIN, c);
- break;
- }
-
- assert(NOTREACHED);
-}
-
-/*
- * skip - skip white space and comments in expanded form
- */
-static void
-skip(struct vars * v)
-{
- chr *start = v->now;
-
- assert(v->cflags & REG_EXPANDED);
-
- for (;;)
- {
- while (!ATEOS() && iscspace(*v->now))
- v->now++;
- if (ATEOS() || *v->now != CHR('#'))
- break; /* NOTE BREAK OUT */
- assert(NEXT1('#'));
- while (!ATEOS() && *v->now != CHR('\n'))
- v->now++;
- /* leave the newline to be picked up by the iscspace loop */
- }
-
- if (v->now != start)
- NOTE(REG_UNONPOSIX);
-}
-
-/*
- * newline - return the chr for a newline
- *
- * This helps confine use of CHR to this source file.
- */
-static chr
-newline(void)
-{
- return CHR('\n');
-}
-
-/*
- * chrnamed - return the chr known by a given (chr string) name
- *
- * The code is a bit clumsy, but this routine gets only such specialized
- * use that it hardly matters.
- */
-static chr
-chrnamed(struct vars * v,
- chr *startp, /* start of name */
- chr *endp, /* just past end of name */
- chr lastresort) /* what to return if name lookup fails */
-{
- celt c;
- int errsave;
- int e;
- struct cvec *cv;
-
- errsave = v->err;
- v->err = 0;
- c = element(v, startp, endp);
- e = v->err;
- v->err = errsave;
-
- if (e != 0)
- return (chr) lastresort;
-
- cv = range(v, c, c, 0);
- if (cv->nchrs == 0)
- return (chr) lastresort;
- return cv->chrs[0];
-}
diff --git a/src/regex/regc_locale.c b/src/regex/regc_locale.c
deleted file mode 100644
index 4e13b8488b..0000000000
--- a/src/regex/regc_locale.c
+++ /dev/null
@@ -1,838 +0,0 @@
-/*
- * regc_locale.c --
- *
- * This file contains locale-specific regexp routines.
- * This file is #included by regcomp.c.
- *
- * Copyright (c) 1998 by Scriptics Corporation.
- *
- * This software is copyrighted by the Regents of the University of
- * California, Sun Microsystems, Inc., Scriptics Corporation, ActiveState
- * Corporation and other parties. The following terms apply to all files
- * associated with the software unless explicitly disclaimed in
- * individual files.
- *
- * The authors hereby grant permission to use, copy, modify, distribute,
- * and license this software and its documentation for any purpose, provided
- * that existing copyright notices are retained in all copies and that this
- * notice is included verbatim in any distributions. No written agreement,
- * license, or royalty fee is required for any of the authorized uses.
- * Modifications to this software may be copyrighted by their authors
- * and need not follow the licensing terms described here, provided that
- * the new terms are clearly indicated on the first page of each file where
- * they apply.
- *
- * IN NO EVENT SHALL THE AUTHORS OR DISTRIBUTORS BE LIABLE TO ANY PARTY
- * FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
- * ARISING OUT OF THE USE OF THIS SOFTWARE, ITS DOCUMENTATION, OR ANY
- * DERIVATIVES THEREOF, EVEN IF THE AUTHORS HAVE BEEN ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- * THE AUTHORS AND DISTRIBUTORS SPECIFICALLY DISCLAIM ANY WARRANTIES,
- * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, AND NON-INFRINGEMENT. THIS SOFTWARE
- * IS PROVIDED ON AN "AS IS" BASIS, AND THE AUTHORS AND DISTRIBUTORS HAVE
- * NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR
- * MODIFICATIONS.
- *
- * GOVERNMENT USE: If you are acquiring this software on behalf of the
- * U.S. government, the Government shall have only "Restricted Rights"
- * in the software and related documentation as defined in the Federal
- * Acquisition Regulations (FARs) in Clause 52.227.19 (c) (2). If you
- * are acquiring the software on behalf of the Department of Defense, the
- * software shall be classified as "Commercial Computer Software" and the
- * Government shall have only "Restricted Rights" as defined in Clause
- * 252.227-7013 (c) (1) of DFARs. Notwithstanding the foregoing, the
- * authors grant the U.S. Government and others acting in its behalf
- * permission to use and distribute the software in accordance with the
- * terms specified in this license.
- *
- * $Header$
- */
-
-int char_and_wchar_strncmp (const char* cp, const wx_wchar* wp, size_t nNum)
-{
- while(*cp++ == (const char)*wp++ && --nNum){}
-
- return nNum;
-}
-
-/* ASCII character-name table */
-
-static struct cname
-{
- char *name;
- char code;
-} cnames[] =
-
-{
- {
- "NUL", '\0'
- },
- {
- "SOH", '\001'
- },
- {
- "STX", '\002'
- },
- {
- "ETX", '\003'
- },
- {
- "EOT", '\004'
- },
- {
- "ENQ", '\005'
- },
- {
- "ACK", '\006'
- },
- {
- "BEL", '\007'
- },
- {
- "alert", '\007'
- },
- {
- "BS", '\010'
- },
- {
- "backspace", '\b'
- },
- {
- "HT", '\011'
- },
- {
- "tab", '\t'
- },
- {
- "LF", '\012'
- },
- {
- "newline", '\n'
- },
- {
- "VT", '\013'
- },
- {
- "vertical-tab", '\v'
- },
- {
- "FF", '\014'
- },
- {
- "form-feed", '\f'
- },
- {
- "CR", '\015'
- },
- {
- "carriage-return", '\r'
- },
- {
- "SO", '\016'
- },
- {
- "SI", '\017'
- },
- {
- "DLE", '\020'
- },
- {
- "DC1", '\021'
- },
- {
- "DC2", '\022'
- },
- {
- "DC3", '\023'
- },
- {
- "DC4", '\024'
- },
- {
- "NAK", '\025'
- },
- {
- "SYN", '\026'
- },
- {
- "ETB", '\027'
- },
- {
- "CAN", '\030'
- },
- {
- "EM", '\031'
- },
- {
- "SUB", '\032'
- },
- {
- "ESC", '\033'
- },
- {
- "IS4", '\034'
- },
- {
- "FS", '\034'
- },
- {
- "IS3", '\035'
- },
- {
- "GS", '\035'
- },
- {
- "IS2", '\036'
- },
- {
- "RS", '\036'
- },
- {
- "IS1", '\037'
- },
- {
- "US", '\037'
- },
- {
- "space", ' '
- },
- {
- "exclamation-mark", '!'
- },
- {
- "quotation-mark", '"'
- },
- {
- "number-sign", '#'
- },
- {
- "dollar-sign", '$'
- },
- {
- "percent-sign", '%'
- },
- {
- "ampersand", '&'
- },
- {
- "apostrophe", '\''
- },
- {
- "left-parenthesis", '('
- },
- {
- "right-parenthesis", ')'
- },
- {
- "asterisk", '*'
- },
- {
- "plus-sign", '+'
- },
- {
- "comma", ','
- },
- {
- "hyphen", '-'
- },
- {
- "hyphen-minus", '-'
- },
- {
- "period", '.'
- },
- {
- "full-stop", '.'
- },
- {
- "slash", '/'
- },
- {
- "solidus", '/'
- },
- {
- "zero", '0'
- },
- {
- "one", '1'
- },
- {
- "two", '2'
- },
- {
- "three", '3'
- },
- {
- "four", '4'
- },
- {
- "five", '5'
- },
- {
- "six", '6'
- },
- {
- "seven", '7'
- },
- {
- "eight", '8'
- },
- {
- "nine", '9'
- },
- {
- "colon", ':'
- },
- {
- "semicolon", ';'
- },
- {
- "less-than-sign", '<'
- },
- {
- "equals-sign", '='
- },
- {
- "greater-than-sign", '>'
- },
- {
- "question-mark", '?'
- },
- {
- "commercial-at", '@'
- },
- {
- "left-square-bracket", '['
- },
- {
- "backslash", '\\'
- },
- {
- "reverse-solidus", '\\'
- },
- {
- "right-square-bracket", ']'
- },
- {
- "circumflex", '^'
- },
- {
- "circumflex-accent", '^'
- },
- {
- "underscore", '_'
- },
- {
- "low-line", '_'
- },
- {
- "grave-accent", '`'
- },
- {
- "left-brace", '{'
- },
- {
- "left-curly-bracket", '{'
- },
- {
- "vertical-line", '|'
- },
- {
- "right-brace", '}'
- },
- {
- "right-curly-bracket", '}'
- },
- {
- "tilde", '~'
- },
- {
- "DEL", '\177'
- },
- {
- NULL, 0
- }
-};
-
-/*
- * some ctype functions with non-ascii-char guard
- */
-static int
-wx_isdigit(wx_wchar c)
-{
- return (c >= 0 && c <= UCHAR_MAX && isdigit((unsigned char) c));
-}
-
-static int
-wx_isalpha(wx_wchar c)
-{
- return (c >= 0 && c <= UCHAR_MAX && isalpha((unsigned char) c));
-}
-
-static int
-wx_isalnum(wx_wchar c)
-{
- return (c >= 0 && c <= UCHAR_MAX && isalnum((unsigned char) c));
-}
-
-static int
-wx_isupper(wx_wchar c)
-{
- return (c >= 0 && c <= UCHAR_MAX && isupper((unsigned char) c));
-}
-
-static int
-wx_islower(wx_wchar c)
-{
- return (c >= 0 && c <= UCHAR_MAX && islower((unsigned char) c));
-}
-
-static int
-wx_isgraph(wx_wchar c)
-{
- return (c >= 0 && c <= UCHAR_MAX && isgraph((unsigned char) c));
-}
-
-static int
-wx_ispunct(wx_wchar c)
-{
- return (c >= 0 && c <= UCHAR_MAX && ispunct((unsigned char) c));
-}
-
-static int
-wx_isspace(wx_wchar c)
-{
- return (c >= 0 && c <= UCHAR_MAX && isspace((unsigned char) c));
-}
-
-static wx_wchar
-wx_toupper(wx_wchar c)
-{
- if (c >= 0 && c <= UCHAR_MAX)
- return toupper((unsigned char) c);
- return c;
-}
-
-static wx_wchar
-wx_tolower(wx_wchar c)
-{
- if (c >= 0 && c <= UCHAR_MAX)
- return tolower((unsigned char) c);
- return c;
-}
-
-
-/*
- * nmcces - how many distinct MCCEs are there?
- */
-static int
-nmcces(struct vars * v)
-{
- /*
- * No multi-character collating elements defined at the moment.
- */
- return 0;
-}
-
-/*
- * nleaders - how many chrs can be first chrs of MCCEs?
- */
-static int
-nleaders(struct vars * v)
-{
- return 0;
-}
-
-/*
- * allmcces - return a cvec with all the MCCEs of the locale
- */
-static struct cvec *
-allmcces(struct vars * v, /* context */
- struct cvec * cv) /* this is supposed to have enough room */
-{
- return clearcvec(cv);
-}
-
-/*
- * element - map collating-element name to celt
- */
-static celt
-element(struct vars * v, /* context */
- chr *startp, /* points to start of name */
- chr *endp) /* points just past end of name */
-{
- struct cname *cn;
- size_t len;
-
- /* generic: one-chr names stand for themselves */
- assert(startp < endp);
- len = endp - startp;
- if (len == 1)
- return *startp;
-
- NOTE(REG_ULOCALE);
-
- /* search table */
- for (cn = cnames; cn->name != NULL; cn++)
- {
- if (strlen(cn->name) == len &&
- char_and_wchar_strncmp(cn->name, startp, len) == 0)
- {
- break; /* NOTE BREAK OUT */
- }
- }
- if (cn->name != NULL)
- return CHR(cn->code);
-
- /* couldn't find it */
- ERR(REG_ECOLLATE);
- return 0;
-}
-
-/*
- * range - supply cvec for a range, including legality check
- */
-static struct cvec *
-range(struct vars * v, /* context */
- celt a, /* range start */
- celt b, /* range end, might equal a */
- int cases) /* case-independent? */
-{
- int nchrs;
- struct cvec *cv;
- celt c,
- lc,
- uc;
-
- if (a != b && !before(a, b))
- {
- ERR(REG_ERANGE);
- return NULL;
- }
-
- if (!cases)
- { /* easy version */
- cv = getcvec(v, 0, 1, 0);
- NOERRN();
- addrange(cv, a, b);
- return cv;
- }
-
- /*
- * When case-independent, it's hard to decide when cvec ranges are
- * usable, so for now at least, we won't try. We allocate enough
- * space for two case variants plus a little extra for the two title
- * case variants.
- */
-
- nchrs = (b - a + 1) * 2 + 4;
-
- cv = getcvec(v, nchrs, 0, 0);
- NOERRN();
-
- for (c = a; c <= b; c++)
- {
- addchr(cv, c);
- lc = wx_tolower((chr) c);
- if (c != lc)
- addchr(cv, lc);
- uc = wx_toupper((chr) c);
- if (c != uc)
- addchr(cv, uc);
- }
-
- return cv;
-}
-
-/*
- * before - is celt x before celt y, for purposes of range legality?
- */
-static int /* predicate */
-before(celt x, celt y)
-{
- /* trivial because no MCCEs */
- if (x < y)
- return 1;
- return 0;
-}
-
-/*
- * eclass - supply cvec for an equivalence class
- * Must include case counterparts on request.
- */
-static struct cvec *
-eclass(struct vars * v, /* context */
- celt c, /* Collating element representing the
- * equivalence class. */
- int cases) /* all cases? */
-{
- struct cvec *cv;
-
- /* crude fake equivalence class for testing */
- if ((v->cflags & REG_FAKE) && c == 'x')
- {
- cv = getcvec(v, 4, 0, 0);
- addchr(cv, (chr) 'x');
- addchr(cv, (chr) 'y');
- if (cases)
- {
- addchr(cv, (chr) 'X');
- addchr(cv, (chr) 'Y');
- }
- return cv;
- }
-
- /* otherwise, none */
- if (cases)
- return allcases(v, c);
- cv = getcvec(v, 1, 0, 0);
- assert(cv != NULL);
- addchr(cv, (chr) c);
- return cv;
-}
-
-/*
- * cclass - supply cvec for a character class
- *
- * Must include case counterparts on request.
- */
-static struct cvec *
-cclass(struct vars * v, /* context */
- chr *startp, /* where the name starts */
- chr *endp, /* just past the end of the name */
- int cases) /* case-independent? */
-{
- size_t len;
- struct cvec *cv = NULL;
- char **namePtr;
- int i,
- index;
-
- /*
- * The following arrays define the valid character class names.
- */
-
- static char *classNames[] = {
- "alnum", "alpha", "ascii", "blank", "cntrl", "digit", "graph",
- "lower", "print", "punct", "space", "upper", "xdigit", NULL
- };
-
- enum classes
- {
- CC_ALNUM, CC_ALPHA, CC_ASCII, CC_BLANK, CC_CNTRL, CC_DIGIT, CC_GRAPH,
- CC_LOWER, CC_PRINT, CC_PUNCT, CC_SPACE, CC_UPPER, CC_XDIGIT
- };
-
- /*
- * Map the name to the corresponding enumerated value.
- */
- len = endp - startp;
- index = -1;
- for (namePtr = classNames, i = 0; *namePtr != NULL; namePtr++, i++)
- {
- if (strlen(*namePtr) == len &&
- char_and_wchar_strncmp(*namePtr, startp, len) == 0)
- {
- index = i;
- break;
- }
- }
- if (index == -1)
- {
- ERR(REG_ECTYPE);
- return NULL;
- }
-
- /*
- * Remap lower and upper to alpha if the match is case insensitive.
- */
-
- if (cases &&
- ((enum classes) index == CC_LOWER ||
- (enum classes) index == CC_UPPER))
- index = (int) CC_ALPHA;
-
- /*
- * Now compute the character class contents.
- *
- * For the moment, assume that only char codes < 256 can be in these
- * classes.
- */
-
- switch ((enum classes) index)
- {
- case CC_PRINT:
- case CC_ALNUM:
- cv = getcvec(v, UCHAR_MAX, 1, 0);
- if (cv)
- {
- for (i = 0; i <= UCHAR_MAX; i++)
- {
- if (wx_isalpha((chr) i))
- addchr(cv, (chr) i);
- }
- addrange(cv, (chr) '0', (chr) '9');
- }
- break;
- case CC_ALPHA:
- cv = getcvec(v, UCHAR_MAX, 0, 0);
- if (cv)
- {
- for (i = 0; i <= UCHAR_MAX; i++)
- {
- if (wx_isalpha((chr) i))
- addchr(cv, (chr) i);
- }
- }
- break;
- case CC_ASCII:
- cv = getcvec(v, 0, 1, 0);
- if (cv)
- addrange(cv, 0, 0x7f);
- break;
- case CC_BLANK:
- cv = getcvec(v, 2, 0, 0);
- addchr(cv, '\t');
- addchr(cv, ' ');
- break;
- case CC_CNTRL:
- cv = getcvec(v, 0, 2, 0);
- addrange(cv, 0x0, 0x1f);
- addrange(cv, 0x7f, 0x9f);
- break;
- case CC_DIGIT:
- cv = getcvec(v, 0, 1, 0);
- if (cv)
- addrange(cv, (chr) '0', (chr) '9');
- break;
- case CC_PUNCT:
- cv = getcvec(v, UCHAR_MAX, 0, 0);
- if (cv)
- {
- for (i = 0; i <= UCHAR_MAX; i++)
- {
- if (wx_ispunct((chr) i))
- addchr(cv, (chr) i);
- }
- }
- break;
- case CC_XDIGIT:
- cv = getcvec(v, 0, 3, 0);
- if (cv)
- {
- addrange(cv, '0', '9');
- addrange(cv, 'a', 'f');
- addrange(cv, 'A', 'F');
- }
- break;
- case CC_SPACE:
- cv = getcvec(v, UCHAR_MAX, 0, 0);
- if (cv)
- {
- for (i = 0; i <= UCHAR_MAX; i++)
- {
- if (wx_isspace((chr) i))
- addchr(cv, (chr) i);
- }
- }
- break;
- case CC_LOWER:
- cv = getcvec(v, UCHAR_MAX, 0, 0);
- if (cv)
- {
- for (i = 0; i <= UCHAR_MAX; i++)
- {
- if (wx_islower((chr) i))
- addchr(cv, (chr) i);
- }
- }
- break;
- case CC_UPPER:
- cv = getcvec(v, UCHAR_MAX, 0, 0);
- if (cv)
- {
- for (i = 0; i <= UCHAR_MAX; i++)
- {
- if (wx_isupper((chr) i))
- addchr(cv, (chr) i);
- }
- }
- break;
- case CC_GRAPH:
- cv = getcvec(v, UCHAR_MAX, 0, 0);
- if (cv)
- {
- for (i = 0; i <= UCHAR_MAX; i++)
- {
- if (wx_isgraph((chr) i))
- addchr(cv, (chr) i);
- }
- }
- break;
- }
- if (cv == NULL)
- ERR(REG_ESPACE);
- return cv;
-}
-
-/*
- * allcases - supply cvec for all case counterparts of a chr (including itself)
- *
- * This is a shortcut, preferably an efficient one, for simple characters;
- * messy cases are done via range().
- */
-static struct cvec *
-allcases(struct vars * v, /* context */
- chr pc) /* character to get case equivs of */
-{
- struct cvec *cv;
- chr c = (chr) pc;
- chr lc,
- uc;
-
- lc = wx_tolower((chr) c);
- uc = wx_toupper((chr) c);
-
- cv = getcvec(v, 2, 0, 0);
- addchr(cv, lc);
- if (lc != uc)
- addchr(cv, uc);
- return cv;
-}
-
-/*
- * cmp - chr-substring compare
- *
- * Backrefs need this. It should preferably be efficient.
- * Note that it does not need to report anything except equal/unequal.
- * Note also that the length is exact, and the comparison should not
- * stop at embedded NULs!
- */
-static int /* 0 for equal, nonzero for unequal */
-cmp(const chr *x, const chr *y, /* strings to compare */
- size_t len) /* exact length of comparison */
-{
- return memcmp(VS(x), VS(y), len * sizeof(chr));
-}
-
-/*
- * casecmp - case-independent chr-substring compare
- *
- * REG_ICASE backrefs need this. It should preferably be efficient.
- * Note that it does not need to report anything except equal/unequal.
- * Note also that the length is exact, and the comparison should not
- * stop at embedded NULs!
- */
-static int /* 0 for equal, nonzero for unequal */
-casecmp(const chr *x, const chr *y, /* strings to compare */
- size_t len) /* exact length of comparison */
-{
- for (; len > 0; len--, x++, y++)
- {
- if ((*x != *y) && (wx_tolower(*x) != wx_tolower(*y)))
- return 1;
- }
- return 0;
-}
diff --git a/src/regex/regc_nfa.c b/src/regex/regc_nfa.c
deleted file mode 100644
index cc9f6ea2f9..0000000000
--- a/src/regex/regc_nfa.c
+++ /dev/null
@@ -1,1559 +0,0 @@
-/*
- * NFA utilities.
- * This file is #included by regcomp.c.
- *
- * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved.
- *
- * Development of this software was funded, in part, by Cray Research Inc.,
- * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
- * Corporation, none of whom are responsible for the results. The author
- * thanks all of them.
- *
- * Redistribution and use in source and binary forms -- with or without
- * modification -- are permitted for any purpose, provided that
- * redistributions in source form retain this entire copyright notice and
- * indicate the origin and nature of any modifications.
- *
- * I'd appreciate being given credit for this package in the documentation
- * of software which uses it, but that is not a requirement.
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
- * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
- * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
- * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
- * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
- * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
- * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
- * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * $Header$
- *
- *
- * One or two things that technically ought to be in here
- * are actually in color.c, thanks to some incestuous relationships in
- * the color chains.
- */
-
-#define NISERR() VISERR(nfa->v)
-#define NERR(e) VERR(nfa->v, (e))
-
-
-/*
- * newnfa - set up an NFA
- */
-static struct nfa * /* the NFA, or NULL */
-newnfa(struct vars * v,
- struct colormap * cm,
- struct nfa * parent) /* NULL if primary NFA */
-{
- struct nfa *nfa;
-
- nfa = (struct nfa *) MALLOC(sizeof(struct nfa));
- if (nfa == NULL)
- return NULL;
-
- nfa->states = NULL;
- nfa->slast = NULL;
- nfa->free = NULL;
- nfa->nstates = 0;
- nfa->cm = cm;
- nfa->v = v;
- nfa->bos[0] = nfa->bos[1] = COLORLESS;
- nfa->eos[0] = nfa->eos[1] = COLORLESS;
- nfa->post = newfstate(nfa, '@'); /* number 0 */
- nfa->pre = newfstate(nfa, '>'); /* number 1 */
- nfa->parent = parent;
-
- nfa->init = newstate(nfa); /* may become invalid later */
- nfa->final = newstate(nfa);
- if (ISERR())
- {
- freenfa(nfa);
- return NULL;
- }
- rainbow(nfa, nfa->cm, PLAIN, COLORLESS, nfa->pre, nfa->init);
- newarc(nfa, '^', 1, nfa->pre, nfa->init);
- newarc(nfa, '^', 0, nfa->pre, nfa->init);
- rainbow(nfa, nfa->cm, PLAIN, COLORLESS, nfa->final, nfa->post);
- newarc(nfa, '$', 1, nfa->final, nfa->post);
- newarc(nfa, '$', 0, nfa->final, nfa->post);
-
- if (ISERR())
- {
- freenfa(nfa);
- return NULL;
- }
- return nfa;
-}
-
-/*
- * freenfa - free an entire NFA
- */
-static void
-freenfa(struct nfa * nfa)
-{
- struct state *s;
-
- while ((s = nfa->states) != NULL)
- {
- s->nins = s->nouts = 0; /* don't worry about arcs */
- freestate(nfa, s);
- }
- while ((s = nfa->free) != NULL)
- {
- nfa->free = s->next;
- destroystate(nfa, s);
- }
-
- nfa->slast = NULL;
- nfa->nstates = -1;
- nfa->pre = NULL;
- nfa->post = NULL;
- FREE(nfa);
-}
-
-/*
- * newstate - allocate an NFA state, with zero flag value
- */
-static struct state * /* NULL on error */
-newstate(struct nfa * nfa)
-{
- struct state *s;
-
- if (nfa->free != NULL)
- {
- s = nfa->free;
- nfa->free = s->next;
- }
- else
- {
- s = (struct state *) MALLOC(sizeof(struct state));
- if (s == NULL)
- {
- NERR(REG_ESPACE);
- return NULL;
- }
- s->oas.next = NULL;
- s->free = NULL;
- s->noas = 0;
- }
-
- assert(nfa->nstates >= 0);
- s->no = nfa->nstates++;
- s->flag = 0;
- if (nfa->states == NULL)
- nfa->states = s;
- s->nins = 0;
- s->ins = NULL;
- s->nouts = 0;
- s->outs = NULL;
- s->tmp = NULL;
- s->next = NULL;
- if (nfa->slast != NULL)
- {
- assert(nfa->slast->next == NULL);
- nfa->slast->next = s;
- }
- s->prev = nfa->slast;
- nfa->slast = s;
- return s;
-}
-
-/*
- * newfstate - allocate an NFA state with a specified flag value
- */
-static struct state * /* NULL on error */
-newfstate(struct nfa * nfa, int flag)
-{
- struct state *s;
-
- s = newstate(nfa);
- if (s != NULL)
- s->flag = (char) flag;
- return s;
-}
-
-/*
- * dropstate - delete a state's inarcs and outarcs and free it
- */
-static void
-dropstate(struct nfa * nfa,
- struct state * s)
-{
- struct arc *a;
-
- while ((a = s->ins) != NULL)
- freearc(nfa, a);
- while ((a = s->outs) != NULL)
- freearc(nfa, a);
- freestate(nfa, s);
-}
-
-/*
- * freestate - free a state, which has no in-arcs or out-arcs
- */
-static void
-freestate(struct nfa * nfa,
- struct state * s)
-{
- assert(s != NULL);
- assert(s->nins == 0 && s->nouts == 0);
-
- s->no = FREESTATE;
- s->flag = 0;
- if (s->next != NULL)
- s->next->prev = s->prev;
- else
- {
- assert(s == nfa->slast);
- nfa->slast = s->prev;
- }
- if (s->prev != NULL)
- s->prev->next = s->next;
- else
- {
- assert(s == nfa->states);
- nfa->states = s->next;
- }
- s->prev = NULL;
- s->next = nfa->free; /* don't delete it, put it on the free
- * list */
- nfa->free = s;
-}
-
-/*
- * destroystate - really get rid of an already-freed state
- */
-static void
-destroystate(struct nfa * nfa,
- struct state * s)
-{
- struct arcbatch *ab;
- struct arcbatch *abnext;
-
- assert(s->no == FREESTATE);
- for (ab = s->oas.next; ab != NULL; ab = abnext)
- {
- abnext = ab->next;
- FREE(ab);
- }
- s->ins = NULL;
- s->outs = NULL;
- s->next = NULL;
- FREE(s);
-}
-
-/*
- * newarc - set up a new arc within an NFA
- */
-static void
-newarc(struct nfa * nfa,
- int t,
- pcolor co,
- struct state * from,
- struct state * to)
-{
- struct arc *a;
-
- assert(from != NULL && to != NULL);
-
- /* check for duplicates */
- for (a = from->outs; a != NULL; a = a->outchain)
- if (a->to == to && a->co == co && a->type == t)
- return;
-
- a = allocarc(nfa, from);
- if (NISERR())
- return;
- assert(a != NULL);
-
- a->type = t;
- a->co = (color) co;
- a->to = to;
- a->from = from;
-
- /*
- * Put the new arc on the beginning, not the end, of the chains. Not
- * only is this easier, it has the very useful side effect that
- * deleting the most-recently-added arc is the cheapest case rather
- * than the most expensive one.
- */
- a->inchain = to->ins;
- to->ins = a;
- a->outchain = from->outs;
- from->outs = a;
-
- from->nouts++;
- to->nins++;
-
- if (COLORED(a) && nfa->parent == NULL)
- colorchain(nfa->cm, a);
-
- return;
-}
-
-/*
- * allocarc - allocate a new out-arc within a state
- */
-static struct arc * /* NULL for failure */
-allocarc(struct nfa * nfa,
- struct state * s)
-{
- struct arc *a;
- struct arcbatch *new;
- int i;
-
- /* shortcut */
- if (s->free == NULL && s->noas < ABSIZE)
- {
- a = &s->oas.a[s->noas];
- s->noas++;
- return a;
- }
-
- /* if none at hand, get more */
- if (s->free == NULL)
- {
- new = (struct arcbatch *) MALLOC(sizeof(struct arcbatch));
- if (new == NULL)
- {
- NERR(REG_ESPACE);
- return NULL;
- }
- new->next = s->oas.next;
- s->oas.next = new;
-
- for (i = 0; i < ABSIZE; i++)
- {
- new->a[i].type = 0;
- new->a[i].freechain = &new->a[i + 1];
- }
- new->a[ABSIZE - 1].freechain = NULL;
- s->free = &new->a[0];
- }
- assert(s->free != NULL);
-
- a = s->free;
- s->free = a->freechain;
- return a;
-}
-
-/*
- * freearc - free an arc
- */
-static void
-freearc(struct nfa * nfa,
- struct arc * victim)
-{
- struct state *from = victim->from;
- struct state *to = victim->to;
- struct arc *a;
-
- assert(victim->type != 0);
-
- /* take it off color chain if necessary */
- if (COLORED(victim) && nfa->parent == NULL)
- uncolorchain(nfa->cm, victim);
-
- /* take it off source's out-chain */
- assert(from != NULL);
- assert(from->outs != NULL);
- a = from->outs;
- if (a == victim) /* simple case: first in chain */
- from->outs = victim->outchain;
- else
- {
- for (; a != NULL && a->outchain != victim; a = a->outchain)
- continue;
- assert(a != NULL);
- a->outchain = victim->outchain;
- }
- from->nouts--;
-
- /* take it off target's in-chain */
- assert(to != NULL);
- assert(to->ins != NULL);
- a = to->ins;
- if (a == victim) /* simple case: first in chain */
- to->ins = victim->inchain;
- else
- {
- for (; a != NULL && a->inchain != victim; a = a->inchain)
- continue;
- assert(a != NULL);
- a->inchain = victim->inchain;
- }
- to->nins--;
-
- /* clean up and place on free list */
- victim->type = 0;
- victim->from = NULL; /* precautions... */
- victim->to = NULL;
- victim->inchain = NULL;
- victim->outchain = NULL;
- victim->freechain = from->free;
- from->free = victim;
-}
-
-/*
- * findarc - find arc, if any, from given source with given type and color
- * If there is more than one such arc, the result is random.
- */
-static struct arc *
-findarc(struct state * s,
- int type,
- pcolor co)
-{
- struct arc *a;
-
- for (a = s->outs; a != NULL; a = a->outchain)
- if (a->type == type && a->co == co)
- return a;
- return NULL;
-}
-
-/*
- * cparc - allocate a new arc within an NFA, copying details from old one
- */
-static void
-cparc(struct nfa * nfa,
- struct arc * oa,
- struct state * from,
- struct state * to)
-{
- newarc(nfa, oa->type, oa->co, from, to);
-}
-
-/*
- * moveins - move all in arcs of a state to another state
- *
- * You might think this could be done better by just updating the
- * existing arcs, and you would be right if it weren't for the desire
- * for duplicate suppression, which makes it easier to just make new
- * ones to exploit the suppression built into newarc.
- */
-static void
-moveins(struct nfa * nfa,
- struct state * old,
- struct state * new)
-{
- struct arc *a;
-
- assert(old != new);
-
- while ((a = old->ins) != NULL)
- {
- cparc(nfa, a, a->from, new);
- freearc(nfa, a);
- }
- assert(old->nins == 0);
- assert(old->ins == NULL);
-}
-
-/*
- * copyins - copy all in arcs of a state to another state
- */
-static void
-copyins(struct nfa * nfa,
- struct state * old,
- struct state * new)
-{
- struct arc *a;
-
- assert(old != new);
-
- for (a = old->ins; a != NULL; a = a->inchain)
- cparc(nfa, a, a->from, new);
-}
-
-/*
- * moveouts - move all out arcs of a state to another state
- */
-static void
-moveouts(struct nfa * nfa,
- struct state * old,
- struct state * new)
-{
- struct arc *a;
-
- assert(old != new);
-
- while ((a = old->outs) != NULL)
- {
- cparc(nfa, a, new, a->to);
- freearc(nfa, a);
- }
-}
-
-/*
- * copyouts - copy all out arcs of a state to another state
- */
-static void
-copyouts(struct nfa * nfa,
- struct state * old,
- struct state * new)
-{
- struct arc *a;
-
- assert(old != new);
-
- for (a = old->outs; a != NULL; a = a->outchain)
- cparc(nfa, a, new, a->to);
-}
-
-/*
- * cloneouts - copy out arcs of a state to another state pair, modifying type
- */
-static void
-cloneouts(struct nfa * nfa,
- struct state * old,
- struct state * from,
- struct state * to,
- int type)
-{
- struct arc *a;
-
- assert(old != from);
-
- for (a = old->outs; a != NULL; a = a->outchain)
- newarc(nfa, type, a->co, from, to);
-}
-
-/*
- * delsub - delete a sub-NFA, updating subre pointers if necessary
- *
- * This uses a recursive traversal of the sub-NFA, marking already-seen
- * states using their tmp pointer.
- */
-static void
-delsub(struct nfa * nfa,
- struct state * lp, /* the sub-NFA goes from here... */
- struct state * rp) /* ...to here, *not* inclusive */
-{
- assert(lp != rp);
-
- rp->tmp = rp; /* mark end */
-
- deltraverse(nfa, lp, lp);
- assert(lp->nouts == 0 && rp->nins == 0); /* did the job */
- assert(lp->no != FREESTATE && rp->no != FREESTATE); /* no more */
-
- rp->tmp = NULL; /* unmark end */
- lp->tmp = NULL; /* and begin, marked by deltraverse */
-}
-
-/*
- * deltraverse - the recursive heart of delsub
- * This routine's basic job is to destroy all out-arcs of the state.
- */
-static void
-deltraverse(struct nfa * nfa,
- struct state * leftend,
- struct state * s)
-{
- struct arc *a;
- struct state *to;
-
- if (s->nouts == 0)
- return; /* nothing to do */
- if (s->tmp != NULL)
- return; /* already in progress */
-
- s->tmp = s; /* mark as in progress */
-
- while ((a = s->outs) != NULL)
- {
- to = a->to;
- deltraverse(nfa, leftend, to);
- assert(to->nouts == 0 || to->tmp != NULL);
- freearc(nfa, a);
- if (to->nins == 0 && to->tmp == NULL)
- {
- assert(to->nouts == 0);
- freestate(nfa, to);
- }
- }
-
- assert(s->no != FREESTATE); /* we're still here */
- assert(s == leftend || s->nins != 0); /* and still reachable */
- assert(s->nouts == 0); /* but have no outarcs */
-
- s->tmp = NULL; /* we're done here */
-}
-
-/*
- * dupnfa - duplicate sub-NFA
- *
- * Another recursive traversal, this time using tmp to point to duplicates
- * as well as mark already-seen states. (You knew there was a reason why
- * it's a state pointer, didn't you? :-))
- */
-static void
-dupnfa(struct nfa * nfa,
- struct state * start, /* duplicate of subNFA starting here */
- struct state * stop, /* and stopping here */
- struct state * from, /* stringing duplicate from here */
- struct state * to) /* to here */
-{
- if (start == stop)
- {
- newarc(nfa, EMPTY, 0, from, to);
- return;
- }
-
- stop->tmp = to;
- duptraverse(nfa, start, from);
- /* done, except for clearing out the tmp pointers */
-
- stop->tmp = NULL;
- cleartraverse(nfa, start);
-}
-
-/*
- * duptraverse - recursive heart of dupnfa
- */
-static void
-duptraverse(struct nfa * nfa,
- struct state * s,
- struct state * stmp) /* s's duplicate, or NULL */
-{
- struct arc *a;
-
- if (s->tmp != NULL)
- return; /* already done */
-
- s->tmp = (stmp == NULL) ? newstate(nfa) : stmp;
- if (s->tmp == NULL)
- {
- assert(NISERR());
- return;
- }
-
- for (a = s->outs; a != NULL && !NISERR(); a = a->outchain)
- {
- duptraverse(nfa, a->to, (struct state *) NULL);
- assert(a->to->tmp != NULL);
- cparc(nfa, a, s->tmp, a->to->tmp);
- }
-}
-
-/*
- * cleartraverse - recursive cleanup for algorithms that leave tmp ptrs set
- */
-static void
-cleartraverse(struct nfa * nfa,
- struct state * s)
-{
- struct arc *a;
-
- if (s->tmp == NULL)
- return;
- s->tmp = NULL;
-
- for (a = s->outs; a != NULL; a = a->outchain)
- cleartraverse(nfa, a->to);
-}
-
-/*
- * specialcolors - fill in special colors for an NFA
- */
-static void
-specialcolors(struct nfa * nfa)
-{
- /* false colors for BOS, BOL, EOS, EOL */
- if (nfa->parent == NULL)
- {
- nfa->bos[0] = pseudocolor(nfa->cm);
- nfa->bos[1] = pseudocolor(nfa->cm);
- nfa->eos[0] = pseudocolor(nfa->cm);
- nfa->eos[1] = pseudocolor(nfa->cm);
- }
- else
- {
- assert(nfa->parent->bos[0] != COLORLESS);
- nfa->bos[0] = nfa->parent->bos[0];
- assert(nfa->parent->bos[1] != COLORLESS);
- nfa->bos[1] = nfa->parent->bos[1];
- assert(nfa->parent->eos[0] != COLORLESS);
- nfa->eos[0] = nfa->parent->eos[0];
- assert(nfa->parent->eos[1] != COLORLESS);
- nfa->eos[1] = nfa->parent->eos[1];
- }
-}
-
-/*
- * optimize - optimize an NFA
- */
-static long /* re_info bits */
-optimize(struct nfa * nfa,
- FILE *f) /* for debug output; NULL none */
-{
-#ifdef REG_DEBUG
- int verbose = (f != NULL) ? 1 : 0;
-
- if (verbose)
- fprintf(f, "\ninitial cleanup:\n");
-#endif
- cleanup(nfa); /* may simplify situation */
-#ifdef REG_DEBUG
- if (verbose)
- dumpnfa(nfa, f);
- if (verbose)
- fprintf(f, "\nempties:\n");
-#endif
- fixempties(nfa, f); /* get rid of EMPTY arcs */
-#ifdef REG_DEBUG
- if (verbose)
- fprintf(f, "\nconstraints:\n");
-#endif
- pullback(nfa, f); /* pull back constraints backward */
- pushfwd(nfa, f); /* push fwd constraints forward */
-#ifdef REG_DEBUG
- if (verbose)
- fprintf(f, "\nfinal cleanup:\n");
-#endif
- cleanup(nfa); /* final tidying */
- return analyze(nfa); /* and analysis */
-}
-
-/*
- * pullback - pull back constraints backward to (with luck) eliminate them
- */
-static void
-pullback(struct nfa * nfa,
- FILE *f) /* for debug output; NULL none */
-{
- struct state *s;
- struct state *nexts;
- struct arc *a;
- struct arc *nexta;
- int progress;
-
- /* find and pull until there are no more */
- do
- {
- progress = 0;
- for (s = nfa->states; s != NULL && !NISERR(); s = nexts)
- {
- nexts = s->next;
- for (a = s->outs; a != NULL && !NISERR(); a = nexta)
- {
- nexta = a->outchain;
- if (a->type == '^' || a->type == BEHIND)
- if (pull(nfa, a))
- progress = 1;
- assert(nexta == NULL || s->no != FREESTATE);
- }
- }
- if (progress && f != NULL)
- dumpnfa(nfa, f);
- } while (progress && !NISERR());
- if (NISERR())
- return;
-
- for (a = nfa->pre->outs; a != NULL; a = nexta)
- {
- nexta = a->outchain;
- if (a->type == '^')
- {
- assert(a->co == 0 || a->co == 1);
- newarc(nfa, PLAIN, nfa->bos[a->co], a->from, a->to);
- freearc(nfa, a);
- }
- }
-}
-
-/*
- * pull - pull a back constraint backward past its source state
- * A significant property of this function is that it deletes at most
- * one state -- the constraint's from state -- and only if the constraint
- * was that state's last outarc.
- */
-static int /* 0 couldn't, 1 could */
-pull(struct nfa * nfa,
- struct arc * con)
-{
- struct state *from = con->from;
- struct state *to = con->to;
- struct arc *a;
- struct arc *nexta;
- struct state *s;
-
- if (from == to)
- { /* circular constraint is pointless */
- freearc(nfa, con);
- return 1;
- }
- if (from->flag) /* can't pull back beyond start */
- return 0;
- if (from->nins == 0)
- { /* unreachable */
- freearc(nfa, con);
- return 1;
- }
-
- /* first, clone from state if necessary to avoid other outarcs */
- if (from->nouts > 1)
- {
- s = newstate(nfa);
- if (NISERR())
- return 0;
- assert(to != from); /* con is not an inarc */
- copyins(nfa, from, s); /* duplicate inarcs */
- cparc(nfa, con, s, to); /* move constraint arc */
- freearc(nfa, con);
- from = s;
- con = from->outs;
- }
- assert(from->nouts == 1);
-
- /* propagate the constraint into the from state's inarcs */
- for (a = from->ins; a != NULL; a = nexta)
- {
- nexta = a->inchain;
- switch (combine(con, a))
- {
- case INCOMPATIBLE: /* destroy the arc */
- freearc(nfa, a);
- break;
- case SATISFIED: /* no action needed */
- break;
- case COMPATIBLE: /* swap the two arcs, more or less */
- s = newstate(nfa);
- if (NISERR())
- return 0;
- cparc(nfa, a, s, to); /* anticipate move */
- cparc(nfa, con, a->from, s);
- if (NISERR())
- return 0;
- freearc(nfa, a);
- break;
- default:
- assert(NOTREACHED);
- break;
- }
- }
-
- /* remaining inarcs, if any, incorporate the constraint */
- moveins(nfa, from, to);
- dropstate(nfa, from); /* will free the constraint */
- return 1;
-}
-
-/*
- * pushfwd - push forward constraints forward to (with luck) eliminate them
- */
-static void
-pushfwd(struct nfa * nfa,
- FILE *f) /* for debug output; NULL none */
-{
- struct state *s;
- struct state *nexts;
- struct arc *a;
- struct arc *nexta;
- int progress;
-
- /* find and push until there are no more */
- do
- {
- progress = 0;
- for (s = nfa->states; s != NULL && !NISERR(); s = nexts)
- {
- nexts = s->next;
- for (a = s->ins; a != NULL && !NISERR(); a = nexta)
- {
- nexta = a->inchain;
- if (a->type == '$' || a->type == AHEAD)
- if (push(nfa, a))
- progress = 1;
- assert(nexta == NULL || s->no != FREESTATE);
- }
- }
- if (progress && f != NULL)
- dumpnfa(nfa, f);
- } while (progress && !NISERR());
- if (NISERR())
- return;
-
- for (a = nfa->post->ins; a != NULL; a = nexta)
- {
- nexta = a->inchain;
- if (a->type == '$')
- {
- assert(a->co == 0 || a->co == 1);
- newarc(nfa, PLAIN, nfa->eos[a->co], a->from, a->to);
- freearc(nfa, a);
- }
- }
-}
-
-/*
- * push - push a forward constraint forward past its destination state
- * A significant property of this function is that it deletes at most
- * one state -- the constraint's to state -- and only if the constraint
- * was that state's last inarc.
- */
-static int /* 0 couldn't, 1 could */
-push(struct nfa * nfa,
- struct arc * con)
-{
- struct state *from = con->from;
- struct state *to = con->to;
- struct arc *a;
- struct arc *nexta;
- struct state *s;
-
- if (to == from)
- { /* circular constraint is pointless */
- freearc(nfa, con);
- return 1;
- }
- if (to->flag) /* can't push forward beyond end */
- return 0;
- if (to->nouts == 0)
- { /* dead end */
- freearc(nfa, con);
- return 1;
- }
-
- /* first, clone to state if necessary to avoid other inarcs */
- if (to->nins > 1)
- {
- s = newstate(nfa);
- if (NISERR())
- return 0;
- copyouts(nfa, to, s); /* duplicate outarcs */
- cparc(nfa, con, from, s); /* move constraint */
- freearc(nfa, con);
- to = s;
- con = to->ins;
- }
- assert(to->nins == 1);
-
- /* propagate the constraint into the to state's outarcs */
- for (a = to->outs; a != NULL; a = nexta)
- {
- nexta = a->outchain;
- switch (combine(con, a))
- {
- case INCOMPATIBLE: /* destroy the arc */
- freearc(nfa, a);
- break;
- case SATISFIED: /* no action needed */
- break;
- case COMPATIBLE: /* swap the two arcs, more or less */
- s = newstate(nfa);
- if (NISERR())
- return 0;
- cparc(nfa, con, s, a->to); /* anticipate move */
- cparc(nfa, a, from, s);
- if (NISERR())
- return 0;
- freearc(nfa, a);
- break;
- default:
- assert(NOTREACHED);
- break;
- }
- }
-
- /* remaining outarcs, if any, incorporate the constraint */
- moveouts(nfa, to, from);
- dropstate(nfa, to); /* will free the constraint */
- return 1;
-}
-
-/*
- * combine - constraint lands on an arc, what happens?
- *
- * #def INCOMPATIBLE 1 // destroys arc
- * #def SATISFIED 2 // constraint satisfied
- * #def COMPATIBLE 3 // compatible but not satisfied yet
- */
-static int
-combine(struct arc * con,
- struct arc * a)
-{
-#define CA(ct,at) (((ct)<type, a->type))
- {
- case CA('^', PLAIN): /* newlines are handled separately */
- case CA('$', PLAIN):
- return INCOMPATIBLE;
- break;
- case CA(AHEAD, PLAIN): /* color constraints meet colors */
- case CA(BEHIND, PLAIN):
- if (con->co == a->co)
- return SATISFIED;
- return INCOMPATIBLE;
- break;
- case CA('^', '^'): /* collision, similar constraints */
- case CA('$', '$'):
- case CA(AHEAD, AHEAD):
- case CA(BEHIND, BEHIND):
- if (con->co == a->co) /* true duplication */
- return SATISFIED;
- return INCOMPATIBLE;
- break;
- case CA('^', BEHIND): /* collision, dissimilar constraints */
- case CA(BEHIND, '^'):
- case CA('$', AHEAD):
- case CA(AHEAD, '$'):
- return INCOMPATIBLE;
- break;
- case CA('^', '$'): /* constraints passing each other */
- case CA('^', AHEAD):
- case CA(BEHIND, '$'):
- case CA(BEHIND, AHEAD):
- case CA('$', '^'):
- case CA('$', BEHIND):
- case CA(AHEAD, '^'):
- case CA(AHEAD, BEHIND):
- case CA('^', LACON):
- case CA(BEHIND, LACON):
- case CA('$', LACON):
- case CA(AHEAD, LACON):
- return COMPATIBLE;
- break;
- }
- assert(NOTREACHED);
- return INCOMPATIBLE; /* for benefit of blind compilers */
-}
-
-/*
- * fixempties - get rid of EMPTY arcs
- */
-static void
-fixempties(struct nfa * nfa,
- FILE *f) /* for debug output; NULL none */
-{
- struct state *s;
- struct state *nexts;
- struct arc *a;
- struct arc *nexta;
- int progress;
-
- /* find and eliminate empties until there are no more */
- do
- {
- progress = 0;
- for (s = nfa->states; s != NULL && !NISERR(); s = nexts)
- {
- nexts = s->next;
- for (a = s->outs; a != NULL && !NISERR(); a = nexta)
- {
- nexta = a->outchain;
- if (a->type == EMPTY && unempty(nfa, a))
- progress = 1;
- assert(nexta == NULL || s->no != FREESTATE);
- }
- }
- if (progress && f != NULL)
- dumpnfa(nfa, f);
- } while (progress && !NISERR());
-}
-
-/*
- * unempty - optimize out an EMPTY arc, if possible
- *
- * Actually, as it stands this function always succeeds, but the return
- * value is kept with an eye on possible future changes.
- */
-static int /* 0 couldn't, 1 could */
-unempty(struct nfa * nfa,
- struct arc * a)
-{
- struct state *from = a->from;
- struct state *to = a->to;
- int usefrom; /* work on from, as opposed to to? */
-
- assert(a->type == EMPTY);
- assert(from != nfa->pre && to != nfa->post);
-
- if (from == to)
- { /* vacuous loop */
- freearc(nfa, a);
- return 1;
- }
-
- /* decide which end to work on */
- usefrom = 1; /* default: attack from */
- if (from->nouts > to->nins)
- usefrom = 0;
- else if (from->nouts == to->nins)
- {
- /* decide on secondary issue: move/copy fewest arcs */
- if (from->nins > to->nouts)
- usefrom = 0;
- }
-
- freearc(nfa, a);
- if (usefrom)
- {
- if (from->nouts == 0)
- {
- /* was the state's only outarc */
- moveins(nfa, from, to);
- freestate(nfa, from);
- }
- else
- copyins(nfa, from, to);
- }
- else
- {
- if (to->nins == 0)
- {
- /* was the state's only inarc */
- moveouts(nfa, to, from);
- freestate(nfa, to);
- }
- else
- copyouts(nfa, to, from);
- }
-
- return 1;
-}
-
-/*
- * cleanup - clean up NFA after optimizations
- */
-static void
-cleanup(struct nfa * nfa)
-{
- struct state *s;
- struct state *nexts;
- int n;
-
- /* clear out unreachable or dead-end states */
- /* use pre to mark reachable, then post to mark can-reach-post */
- markreachable(nfa, nfa->pre, (struct state *) NULL, nfa->pre);
- markcanreach(nfa, nfa->post, nfa->pre, nfa->post);
- for (s = nfa->states; s != NULL; s = nexts)
- {
- nexts = s->next;
- if (s->tmp != nfa->post && !s->flag)
- dropstate(nfa, s);
- }
- assert(nfa->post->nins == 0 || nfa->post->tmp == nfa->post);
- cleartraverse(nfa, nfa->pre);
- assert(nfa->post->nins == 0 || nfa->post->tmp == NULL);
- /* the nins==0 (final unreachable) case will be caught later */
-
- /* renumber surviving states */
- n = 0;
- for (s = nfa->states; s != NULL; s = s->next)
- s->no = n++;
- nfa->nstates = n;
-}
-
-/*
- * markreachable - recursive marking of reachable states
- */
-static void
-markreachable(struct nfa * nfa,
- struct state * s,
- struct state * okay, /* consider only states with this
- * mark */
- struct state * mark) /* the value to mark with */
-{
- struct arc *a;
-
- if (s->tmp != okay)
- return;
- s->tmp = mark;
-
- for (a = s->outs; a != NULL; a = a->outchain)
- markreachable(nfa, a->to, okay, mark);
-}
-
-/*
- * markcanreach - recursive marking of states which can reach here
- */
-static void
-markcanreach(struct nfa * nfa,
- struct state * s,
- struct state * okay, /* consider only states with this
- * mark */
- struct state * mark) /* the value to mark with */
-{
- struct arc *a;
-
- if (s->tmp != okay)
- return;
- s->tmp = mark;
-
- for (a = s->ins; a != NULL; a = a->inchain)
- markcanreach(nfa, a->from, okay, mark);
-}
-
-/*
- * analyze - ascertain potentially-useful facts about an optimized NFA
- */
-static long /* re_info bits to be ORed in */
-analyze(struct nfa * nfa)
-{
- struct arc *a;
- struct arc *aa;
-
- if (nfa->pre->outs == NULL)
- return REG_UIMPOSSIBLE;
- for (a = nfa->pre->outs; a != NULL; a = a->outchain)
- for (aa = a->to->outs; aa != NULL; aa = aa->outchain)
- if (aa->to == nfa->post)
- return REG_UEMPTYMATCH;
- return 0;
-}
-
-/*
- * compact - compact an NFA
- */
-static void
-compact(struct nfa * nfa,
- struct cnfa * cnfa)
-{
- struct state *s;
- struct arc *a;
- size_t nstates;
- size_t narcs;
- struct carc *ca;
- struct carc *first;
-
- assert(!NISERR());
-
- nstates = 0;
- narcs = 0;
- for (s = nfa->states; s != NULL; s = s->next)
- {
- nstates++;
- narcs += 1 + s->nouts + 1;
- /* 1 as a fake for flags, nouts for arcs, 1 as endmarker */
- }
-
- cnfa->states = (struct carc **) MALLOC(nstates * sizeof(struct carc *));
- cnfa->arcs = (struct carc *) MALLOC(narcs * sizeof(struct carc));
- if (cnfa->states == NULL || cnfa->arcs == NULL)
- {
- if (cnfa->states != NULL)
- FREE(cnfa->states);
- if (cnfa->arcs != NULL)
- FREE(cnfa->arcs);
- NERR(REG_ESPACE);
- return;
- }
- cnfa->nstates = nstates;
- cnfa->pre = nfa->pre->no;
- cnfa->post = nfa->post->no;
- cnfa->bos[0] = nfa->bos[0];
- cnfa->bos[1] = nfa->bos[1];
- cnfa->eos[0] = nfa->eos[0];
- cnfa->eos[1] = nfa->eos[1];
- cnfa->ncolors = maxcolor(nfa->cm) + 1;
- cnfa->flags = 0;
-
- ca = cnfa->arcs;
- for (s = nfa->states; s != NULL; s = s->next)
- {
- assert((size_t) s->no < nstates);
- cnfa->states[s->no] = ca;
- ca->co = 0; /* clear and skip flags "arc" */
- ca++;
- first = ca;
- for (a = s->outs; a != NULL; a = a->outchain)
- switch (a->type)
- {
- case PLAIN:
- ca->co = a->co;
- ca->to = a->to->no;
- ca++;
- break;
- case LACON:
- assert(s->no != cnfa->pre);
- ca->co = (color) (cnfa->ncolors + a->co);
- ca->to = a->to->no;
- ca++;
- cnfa->flags |= HASLACONS;
- break;
- default:
- assert(NOTREACHED);
- break;
- }
- carcsort(first, ca - 1);
- ca->co = COLORLESS;
- ca->to = 0;
- ca++;
- }
- assert(ca == &cnfa->arcs[narcs]);
- assert(cnfa->nstates != 0);
-
- /* mark no-progress states */
- for (a = nfa->pre->outs; a != NULL; a = a->outchain)
- cnfa->states[a->to->no]->co = 1;
- cnfa->states[nfa->pre->no]->co = 1;
-}
-
-/*
- * carcsort - sort compacted-NFA arcs by color
- *
- * Really dumb algorithm, but if the list is long enough for that to matter,
- * you're in real trouble anyway.
- */
-static void
-carcsort(struct carc * first,
- struct carc * last)
-{
- struct carc *p;
- struct carc *q;
- struct carc tmp;
-
- if (last - first <= 1)
- return;
-
- for (p = first; p <= last; p++)
- for (q = p; q <= last; q++)
- if (p->co > q->co ||
- (p->co == q->co && p->to > q->to))
- {
- assert(p != q);
- tmp = *p;
- *p = *q;
- *q = tmp;
- }
-}
-
-/*
- * freecnfa - free a compacted NFA
- */
-static void
-freecnfa(struct cnfa * cnfa)
-{
- assert(cnfa->nstates != 0); /* not empty already */
- cnfa->nstates = 0;
- FREE(cnfa->states);
- FREE(cnfa->arcs);
-}
-
-/*
- * dumpnfa - dump an NFA in human-readable form
- */
-static void
-dumpnfa(struct nfa * nfa,
- FILE *f)
-{
-#ifdef REG_DEBUG
- struct state *s;
-
- fprintf(f, "pre %d, post %d", nfa->pre->no, nfa->post->no);
- if (nfa->bos[0] != COLORLESS)
- fprintf(f, ", bos [%ld]", (long) nfa->bos[0]);
- if (nfa->bos[1] != COLORLESS)
- fprintf(f, ", bol [%ld]", (long) nfa->bos[1]);
- if (nfa->eos[0] != COLORLESS)
- fprintf(f, ", eos [%ld]", (long) nfa->eos[0]);
- if (nfa->eos[1] != COLORLESS)
- fprintf(f, ", eol [%ld]", (long) nfa->eos[1]);
- fprintf(f, "\n");
- for (s = nfa->states; s != NULL; s = s->next)
- dumpstate(s, f);
- if (nfa->parent == NULL)
- dumpcolors(nfa->cm, f);
- fflush(f);
-#endif
-}
-
-#ifdef REG_DEBUG /* subordinates of dumpnfa */
-
-/*
- * dumpstate - dump an NFA state in human-readable form
- */
-static void
-dumpstate(struct state * s,
- FILE *f)
-{
- struct arc *a;
-
- fprintf(f, "%d%s%c", s->no, (s->tmp != NULL) ? "T" : "",
- (s->flag) ? s->flag : '.');
- if (s->prev != NULL && s->prev->next != s)
- fprintf(f, "\tstate chain bad\n");
- if (s->nouts == 0)
- fprintf(f, "\tno out arcs\n");
- else
- dumparcs(s, f);
- fflush(f);
- for (a = s->ins; a != NULL; a = a->inchain)
- {
- if (a->to != s)
- fprintf(f, "\tlink from %d to %d on %d's in-chain\n",
- a->from->no, a->to->no, s->no);
- }
-}
-
-/*
- * dumparcs - dump out-arcs in human-readable form
- */
-static void
-dumparcs(struct state * s,
- FILE *f)
-{
- int pos;
-
- assert(s->nouts > 0);
- /* printing arcs in reverse order is usually clearer */
- pos = dumprarcs(s->outs, s, f, 1);
- if (pos != 1)
- fprintf(f, "\n");
-}
-
-/*
- * dumprarcs - dump remaining outarcs, recursively, in reverse order
- */
-static int /* resulting print position */
-dumprarcs(struct arc * a,
- struct state * s,
- FILE *f,
- int pos) /* initial print position */
-{
- if (a->outchain != NULL)
- pos = dumprarcs(a->outchain, s, f, pos);
- dumparc(a, s, f);
- if (pos == 5)
- {
- fprintf(f, "\n");
- pos = 1;
- }
- else
- pos++;
- return pos;
-}
-
-/*
- * dumparc - dump one outarc in readable form, including prefixing tab
- */
-static void
-dumparc(struct arc * a,
- struct state * s,
- FILE *f)
-{
- struct arc *aa;
- struct arcbatch *ab;
-
- fprintf(f, "\t");
- switch (a->type)
- {
- case PLAIN:
- fprintf(f, "[%ld]", (long) a->co);
- break;
- case AHEAD:
- fprintf(f, ">%ld>", (long) a->co);
- break;
- case BEHIND:
- fprintf(f, "<%ld<", (long) a->co);
- break;
- case LACON:
- fprintf(f, ":%ld:", (long) a->co);
- break;
- case '^':
- case '$':
- fprintf(f, "%c%d", a->type, (int) a->co);
- break;
- case EMPTY:
- break;
- default:
- fprintf(f, "0x%x/0%lo", a->type, (long) a->co);
- break;
- }
- if (a->from != s)
- fprintf(f, "?%d?", a->from->no);
- for (ab = &a->from->oas; ab != NULL; ab = ab->next)
- {
- for (aa = &ab->a[0]; aa < &ab->a[ABSIZE]; aa++)
- if (aa == a)
- break; /* NOTE BREAK OUT */
- if (aa < &ab->a[ABSIZE]) /* propagate break */
- break; /* NOTE BREAK OUT */
- }
- if (ab == NULL)
- fprintf(f, "?!?"); /* not in allocated space */
- fprintf(f, "->");
- if (a->to == NULL)
- {
- fprintf(f, "NULL");
- return;
- }
- fprintf(f, "%d", a->to->no);
- for (aa = a->to->ins; aa != NULL; aa = aa->inchain)
- if (aa == a)
- break; /* NOTE BREAK OUT */
- if (aa == NULL)
- fprintf(f, "?!?"); /* missing from in-chain */
-}
-#endif /* REG_DEBUG */
-
-/*
- * dumpcnfa - dump a compacted NFA in human-readable form
- */
-#ifdef REG_DEBUG
-static void
-dumpcnfa(struct cnfa * cnfa,
- FILE *f)
-{
- int st;
-
- fprintf(f, "pre %d, post %d", cnfa->pre, cnfa->post);
- if (cnfa->bos[0] != COLORLESS)
- fprintf(f, ", bos [%ld]", (long) cnfa->bos[0]);
- if (cnfa->bos[1] != COLORLESS)
- fprintf(f, ", bol [%ld]", (long) cnfa->bos[1]);
- if (cnfa->eos[0] != COLORLESS)
- fprintf(f, ", eos [%ld]", (long) cnfa->eos[0]);
- if (cnfa->eos[1] != COLORLESS)
- fprintf(f, ", eol [%ld]", (long) cnfa->eos[1]);
- if (cnfa->flags & HASLACONS)
- fprintf(f, ", haslacons");
- fprintf(f, "\n");
- for (st = 0; st < cnfa->nstates; st++)
- dumpcstate(st, cnfa->states[st], cnfa, f);
- fflush(f);
-}
-#endif
-
-#ifdef REG_DEBUG /* subordinates of dumpcnfa */
-
-/*
- * dumpcstate - dump a compacted-NFA state in human-readable form
- */
-static void
-dumpcstate(int st,
- struct carc * ca,
- struct cnfa * cnfa,
- FILE *f)
-{
- int i;
- int pos;
-
- fprintf(f, "%d%s", st, (ca[0].co) ? ":" : ".");
- pos = 1;
- for (i = 1; ca[i].co != COLORLESS; i++)
- {
- if (ca[i].co < cnfa->ncolors)
- fprintf(f, "\t[%ld]->%d", (long) ca[i].co, ca[i].to);
- else
- fprintf(f, "\t:%ld:->%d", (long) ca[i].co - cnfa->ncolors,
- ca[i].to);
- if (pos == 5)
- {
- fprintf(f, "\n");
- pos = 1;
- }
- else
- pos++;
- }
- if (i == 1 || pos != 1)
- fprintf(f, "\n");
- fflush(f);
-}
-
-#endif /* REG_DEBUG */
diff --git a/src/regex/regcomp.c b/src/regex/regcomp.c
index a84b939944..a016af0b77 100644
--- a/src/regex/regcomp.c
+++ b/src/regex/regcomp.c
@@ -1,2261 +1,1610 @@
-/*
- * re_*comp and friends - compile REs
- * This file #includes several others (see the bottom).
- *
- * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved.
- *
- * Development of this software was funded, in part, by Cray Research Inc.,
- * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
- * Corporation, none of whom are responsible for the results. The author
- * thanks all of them.
- *
- * Redistribution and use in source and binary forms -- with or without
- * modification -- are permitted for any purpose, provided that
- * redistributions in source form retain this entire copyright notice and
- * indicate the origin and nature of any modifications.
- *
- * I'd appreciate being given credit for this package in the documentation
- * of software which uses it, but that is not a requirement.
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
- * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
- * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
- * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
- * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
- * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
- * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
- * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * $Header: /projects/cvsroot/pgsql-server/src/backend/regex/regcomp.c,v 1.38 2003/08/08 21:41:56 momjian Exp $
- *
- */
-
-#include "regguts.h"
-
-/*
- * forward declarations, up here so forward datatypes etc. are defined early
- */
-/* === regcomp.c === */
-static void moresubs(struct vars *, int);
-static int freev(struct vars *, int);
-static void makesearch(struct vars *, struct nfa *);
-static struct subre *parse(struct vars *, int, int, struct state *, struct state *);
-static struct subre *parsebranch(struct vars *, int, int, struct state *, struct state *, int);
-static void parseqatom(struct vars *, int, int, struct state *, struct state *, struct subre *);
-static void nonword(struct vars *, int, struct state *, struct state *);
-static void word(struct vars *, int, struct state *, struct state *);
-static int scannum(struct vars *);
-static void repeat(struct vars *, struct state *, struct state *, int, int);
-static void bracket(struct vars *, struct state *, struct state *);
-static void cbracket(struct vars *, struct state *, struct state *);
-static void brackpart(struct vars *, struct state *, struct state *);
-static chr *scanplain(struct vars *);
-static void leaders(struct vars *, struct cvec *);
-static void onechr(struct vars *, chr, struct state *, struct state *);
-static void dovec(struct vars *, struct cvec *, struct state *, struct state *);
-static celt nextleader(struct vars *, chr, chr);
-static void wordchrs(struct vars *);
-static struct subre *subre(struct vars *, int, int, struct state *, struct state *);
-static void freesubre(struct vars *, struct subre *);
-static void freesrnode(struct vars *, struct subre *);
-static void optst(struct vars *, struct subre *);
-static int numst(struct subre *, int);
-static void markst(struct subre *);
-static void cleanst(struct vars *);
-static long nfatree(struct vars *, struct subre *, FILE *);
-static long nfanode(struct vars *, struct subre *, FILE *);
-static int newlacon(struct vars *, struct state *, struct state *, int);
-static void freelacons(struct subre *, int);
-static void rfree(regex_t *);
-
-#ifdef REG_DEBUG
-static void dump(regex_t *, FILE *);
-static void dumpst(struct subre *, FILE *, int);
-static void stdump(struct subre *, FILE *, int);
-static char *stid(struct subre *, char *, size_t);
-#endif
-/* === regc_lex.c === */
-static void lexstart(struct vars *);
-static void prefixes(struct vars *);
-static void lexnest(struct vars *, chr *, chr *);
-static void lexword(struct vars *);
-static int next(struct vars *);
-static int lexescape(struct vars *);
-static chr lexdigits(struct vars *, int, int, int);
-static int brenext(struct vars *, chr);
-static void skip(struct vars *);
-static chr newline(void);
-static chr chrnamed(struct vars *, chr *, chr *, chr);
-
-/* === regc_color.c === */
-static void initcm(struct vars *, struct colormap *);
-static void freecm(struct colormap *);
-static void cmtreefree(struct colormap *, union tree *, int);
-static color setcolor(struct colormap *, chr, pcolor);
-static color maxcolor(struct colormap *);
-static color newcolor(struct colormap *);
-static void freecolor(struct colormap *, pcolor);
-static color pseudocolor(struct colormap *);
-static color subcolor(struct colormap *, chr c);
-static color newsub(struct colormap *, pcolor);
-static void subrange(struct vars *, chr, chr, struct state *, struct state *);
-static void subblock(struct vars *, chr, struct state *, struct state *);
-static void okcolors(struct nfa *, struct colormap *);
-static void colorchain(struct colormap *, struct arc *);
-static void uncolorchain(struct colormap *, struct arc *);
-static int singleton(struct colormap *, chr c);
-static void rainbow(struct nfa *, struct colormap *, int, pcolor, struct state *, struct state *);
-static void colorcomplement(struct nfa *, struct colormap *, int, struct state *, struct state *, struct state *);
-
-#ifdef REG_DEBUG
-static void dumpcolors(struct colormap *, FILE *);
-static void fillcheck(struct colormap *, union tree *, int, FILE *);
-static void dumpchr(chr, FILE *);
-#endif
-/* === regc_nfa.c === */
-static struct nfa *newnfa(struct vars *, struct colormap *, struct nfa *);
-static void freenfa(struct nfa *);
-static struct state *newstate(struct nfa *);
-static struct state *newfstate(struct nfa *, int flag);
-static void dropstate(struct nfa *, struct state *);
-static void freestate(struct nfa *, struct state *);
-static void destroystate(struct nfa *, struct state *);
-static void newarc(struct nfa *, int, pcolor, struct state *, struct state *);
-static struct arc *allocarc(struct nfa *, struct state *);
-static void freearc(struct nfa *, struct arc *);
-static struct arc *findarc(struct state *, int, pcolor);
-static void cparc(struct nfa *, struct arc *, struct state *, struct state *);
-static void moveins(struct nfa *, struct state *, struct state *);
-static void copyins(struct nfa *, struct state *, struct state *);
-static void moveouts(struct nfa *, struct state *, struct state *);
-static void copyouts(struct nfa *, struct state *, struct state *);
-static void cloneouts(struct nfa *, struct state *, struct state *, struct state *, int);
-static void delsub(struct nfa *, struct state *, struct state *);
-static void deltraverse(struct nfa *, struct state *, struct state *);
-static void dupnfa(struct nfa *, struct state *, struct state *, struct state *, struct state *);
-static void duptraverse(struct nfa *, struct state *, struct state *);
-static void cleartraverse(struct nfa *, struct state *);
-static void specialcolors(struct nfa *);
-static long optimize(struct nfa *, FILE *);
-static void pullback(struct nfa *, FILE *);
-static int pull(struct nfa *, struct arc *);
-static void pushfwd(struct nfa *, FILE *);
-static int push(struct nfa *, struct arc *);
-
-#define INCOMPATIBLE 1 /* destroys arc */
-#define SATISFIED 2 /* constraint satisfied */
-#define COMPATIBLE 3 /* compatible but not satisfied yet */
-static int combine(struct arc *, struct arc *);
-static void fixempties(struct nfa *, FILE *);
-static int unempty(struct nfa *, struct arc *);
-static void cleanup(struct nfa *);
-static void markreachable(struct nfa *, struct state *, struct state *, struct state *);
-static void markcanreach(struct nfa *, struct state *, struct state *, struct state *);
-static long analyze(struct nfa *);
-static void compact(struct nfa *, struct cnfa *);
-static void carcsort(struct carc *, struct carc *);
-static void freecnfa(struct cnfa *);
-static void dumpnfa(struct nfa *, FILE *);
-
-#ifdef REG_DEBUG
-static void dumpstate(struct state *, FILE *);
-static void dumparcs(struct state *, FILE *);
-static int dumprarcs(struct arc *, struct state *, FILE *, int);
-static void dumparc(struct arc *, struct state *, FILE *);
-static void dumpcnfa(struct cnfa *, FILE *);
-static void dumpcstate(int, struct carc *, struct cnfa *, FILE *);
-#endif
-/* === regc_cvec.c === */
-static struct cvec *newcvec(int, int, int);
-static struct cvec *clearcvec(struct cvec *);
-static void addchr(struct cvec *, chr);
-static void addrange(struct cvec *, chr, chr);
-static void addmcce(struct cvec *, chr *, chr *);
-static int haschr(struct cvec *, chr);
-static struct cvec *getcvec(struct vars *, int, int, int);
-static void freecvec(struct cvec *);
-
-/* === regc_locale.c === */
-static int wx_isdigit(wx_wchar c);
-static int wx_isalpha(wx_wchar c);
-static int wx_isalnum(wx_wchar c);
-static int wx_isupper(wx_wchar c);
-static int wx_islower(wx_wchar c);
-static int wx_isgraph(wx_wchar c);
-static int wx_ispunct(wx_wchar c);
-static int wx_isspace(wx_wchar c);
-static wx_wchar wx_toupper(wx_wchar c);
-static wx_wchar wx_tolower(wx_wchar c);
-static int nmcces(struct vars *);
-static int nleaders(struct vars *);
-static struct cvec *allmcces(struct vars *, struct cvec *);
-static celt element(struct vars *, chr *, chr *);
-static struct cvec *range(struct vars *, celt, celt, int);
-static int before(celt, celt);
-static struct cvec *eclass(struct vars *, celt, int);
-static struct cvec *cclass(struct vars *, chr *, chr *, int);
-static struct cvec *allcases(struct vars *, chr);
-static int cmp(const chr *, const chr *, size_t);
-static int casecmp(const chr *, const chr *, size_t);
-
-
-/* internal variables, bundled for easy passing around */
-struct vars
-{
- regex_t *re;
- chr *now; /* scan pointer into string */
- chr *stop; /* end of string */
- chr *savenow; /* saved now and stop for "subroutine
- * call" */
- chr *savestop;
- int err; /* error code (0 if none) */
- int cflags; /* copy of compile flags */
- int lasttype; /* type of previous token */
- int nexttype; /* type of next token */
- chr nextvalue; /* value (if any) of next token */
- int lexcon; /* lexical context type (see lex.c) */
- int nsubexp; /* subexpression count */
- struct subre **subs; /* subRE pointer vector */
- size_t nsubs; /* length of vector */
- struct subre *sub10[10]; /* initial vector, enough for most */
- struct nfa *nfa; /* the NFA */
- struct colormap *cm; /* character color map */
- color nlcolor; /* color of newline */
- struct state *wordchrs; /* state in nfa holding word-char outarcs */
- struct subre *tree; /* subexpression tree */
- struct subre *treechain; /* all tree nodes allocated */
- struct subre *treefree; /* any free tree nodes */
- int ntree; /* number of tree nodes */
- struct cvec *cv; /* interface cvec */
- struct cvec *cv2; /* utility cvec */
- struct cvec *mcces; /* collating-element information */
-#define ISCELEADER(v,c) (v->mcces != NULL && haschr(v->mcces, (c)))
- struct state *mccepbegin; /* in nfa, start of MCCE prototypes */
- struct state *mccepend; /* in nfa, end of MCCE prototypes */
- struct subre *lacons; /* lookahead-constraint vector */
- int nlacons; /* size of lacons */
-};
-
-/* parsing macros; most know that `v' is the struct vars pointer */
-#define NEXT() (next(v)) /* advance by one token */
-#define SEE(t) (v->nexttype == (t)) /* is next token this? */
-#define EAT(t) (SEE(t) && next(v)) /* if next is this, swallow it */
-#define VISERR(vv) ((vv)->err != 0) /* have we seen an error yet? */
-#define ISERR() VISERR(v)
-#define VERR(vv,e) ((vv)->nexttype = EOS, ((vv)->err) ? (vv)->err :\
- ((vv)->err = (e)))
-#define ERR(e) VERR(v, e) /* record an error */
-#define NOERR() {if (ISERR()) return;} /* if error seen, return */
-#define NOERRN() {if (ISERR()) return NULL;} /* NOERR with retval */
-#define NOERRZ() {if (ISERR()) return 0;} /* NOERR with retval */
-#define INSIST(c, e) ((c) ? 0 : ERR(e)) /* if condition false,
- * error */
-#define NOTE(b) (v->re->re_info |= (b)) /* note visible condition */
-#define EMPTYARC(x, y) newarc(v->nfa, EMPTY, 0, x, y)
-
-/* token type codes, some also used as NFA arc types */
-#define EMPTY 'n' /* no token present */
-#define EOS 'e' /* end of string */
-#define PLAIN 'p' /* ordinary character */
-#define DIGIT 'd' /* digit (in bound) */
-#define BACKREF 'b' /* back reference */
-#define COLLEL 'I' /* start of [. */
-#define ECLASS 'E' /* start of [= */
-#define CCLASS 'C' /* start of [: */
-#define END 'X' /* end of [. [= [: */
-#define RANGE 'R' /* - within [] which might be range delim. */
-#define LACON 'L' /* lookahead constraint subRE */
-#define AHEAD 'a' /* color-lookahead arc */
-#define BEHIND 'r' /* color-lookbehind arc */
-#define WBDRY 'w' /* word boundary constraint */
-#define NWBDRY 'W' /* non-word-boundary constraint */
-#define SBEGIN 'A' /* beginning of string (even if not BOL) */
-#define SEND 'Z' /* end of string (even if not EOL) */
-#define PREFER 'P' /* length preference */
-
-/* is an arc colored, and hence on a color chain? */
-#define COLORED(a) ((a)->type == PLAIN || (a)->type == AHEAD || \
- (a)->type == BEHIND)
-
-
-
-/* static function list */
-static struct fns functions = {
- rfree, /* regfree insides */
-};
-
-
-
-/*
- * regcomp - compile regular expression
- */
-int
-regcomp(regex_t *re,
- const chr *string,
- int flags)
-{
-
- size_t nLen = 0;
- chr* s2 = (chr*) string;
-
- if (string && *string)
- {
- while(*++s2);
- }
-
- nLen = ((s2 - string) / sizeof(chr));
-
- return wx_regcomp(re, string, nLen, flags);
-}
-int
-wx_regcomp(regex_t *re,
- const chr *string,
- size_t len,
- int flags)
-{
- struct vars var;
- struct vars *v = &var;
- struct guts *g;
- int i;
- size_t j;
-
-#ifdef REG_DEBUG
- FILE *debug = (flags & REG_PROGRESS) ? stdout : (FILE *) NULL;
-
+#if defined(__MWERKS__) && !defined(__MACH__)
+typedef long off_t ;
#else
- FILE *debug = (FILE *) NULL;
+#include
+#endif
+#include
+#include
+#include
+#include
+#include
+#include "regex.h"
+
+#include "utils.h"
+#include "regex2.h"
+
+#include "cclass.h"
+#include "cname.h"
+
+/*
+ * parse structure, passed up and down to avoid global variables and
+ * other clumsinesses
+ */
+struct parse {
+ char *next; /* next character in RE */
+ char *end; /* end of string (-> NUL normally) */
+ int error; /* has an error been seen? */
+ sop *strip; /* malloced strip */
+ sopno ssize; /* malloced strip size (allocated) */
+ sopno slen; /* malloced strip length (used) */
+ int ncsalloc; /* number of csets allocated */
+ struct re_guts *g;
+# define NPAREN 10 /* we need to remember () 1-9 for back refs */
+ sopno pbegin[NPAREN]; /* -> ( ([0] unused) */
+ sopno pend[NPAREN]; /* -> ) ([0] unused) */
+};
+
+#include "regcomp.ih"
+
+static char nuls[10]; /* place to point scanner in event of error */
+
+/*
+ * macros for use with parse structure
+ * BEWARE: these know that the parse structure is named `p' !!!
+ */
+#define PEEK() (*p->next)
+#define PEEK2() (*(p->next+1))
+#define MORE() (p->next < p->end)
+#define MORE2() (p->next+1 < p->end)
+#define SEE(c) (MORE() && PEEK() == (c))
+#define SEETWO(a, b) (MORE() && MORE2() && PEEK() == (a) && PEEK2() == (b))
+#define EAT(c) ((SEE(c)) ? (NEXT(), 1) : 0)
+#define EATTWO(a, b) ((SEETWO(a, b)) ? (NEXT2(), 1) : 0)
+#define NEXT() (p->next++)
+#define NEXT2() (p->next += 2)
+#define NEXTn(n) (p->next += (n))
+#define GETNEXT() (*p->next++)
+#define SETERROR(e) seterr(p, (e))
+#define REQUIRE(co, e) ((void)((co) || SETERROR(e)))
+#define MUSTSEE(c, e) (REQUIRE(MORE() && PEEK() == (c), e))
+#define MUSTEAT(c, e) (REQUIRE(MORE() && GETNEXT() == (c), e))
+#define MUSTNOTSEE(c, e) (REQUIRE(!MORE() || PEEK() != (c), e))
+#define EMIT(op, sopnd) doemit(p, (sop)(op), (size_t)(sopnd))
+#define INSERT(op, pos) doinsert(p, (sop)(op), HERE()-(pos)+1, pos)
+#define AHEAD(pos) dofwd(p, pos, HERE()-(pos))
+#define ASTERN(sop, pos) EMIT(sop, HERE()-pos)
+#define HERE() (p->slen)
+#define THERE() (p->slen - 1)
+#define THERETHERE() (p->slen - 2)
+#define DROP(n) (p->slen -= (n))
+
+#ifndef NDEBUG
+static int never = 0; /* for use in asserts; shuts lint up */
+#else
+#define never 0 /* some s have bugs too */
#endif
-#define CNOERR() { if (ISERR()) return freev(v, v->err); }
+/*
+ - regcomp - interface for parser and compilation
+ = extern int regcomp(regex_t *, const char *, int);
+ = #define REG_BASIC 0000
+ = #define REG_EXTENDED 0001
+ = #define REG_ICASE 0002
+ = #define REG_NOSUB 0004
+ = #define REG_NEWLINE 0010
+ = #define REG_NOSPEC 0020
+ = #define REG_PEND 0040
+ = #define REG_DUMP 0200
+ */
+int /* 0 success, otherwise REG_something */
+regcomp(preg, pattern, cflags)
+regex_t *preg;
+const char *pattern;
+int cflags;
+{
+ struct parse pa;
+ register struct re_guts *g;
+ register struct parse *p = &pa;
+ register int i;
+ register size_t len;
+#ifdef REDEBUG
+# define GOODFLAGS(f) (f)
+#else
+# define GOODFLAGS(f) ((f)&~REG_DUMP)
+#endif
- /* sanity checks */
+ cflags = GOODFLAGS(cflags);
+ if ((cflags®_EXTENDED) && (cflags®_NOSPEC))
+ return(REG_INVARG);
- if (re == NULL || string == NULL)
- return REG_INVARG;
- if ((flags & REG_QUOTE) &&
- (flags & (REG_ADVANCED | REG_EXPANDED | REG_NEWLINE)))
- return REG_INVARG;
- if (!(flags & REG_EXTENDED) && (flags & REG_ADVF))
- return REG_INVARG;
+ if (cflags®_PEND) {
+ if (preg->re_endp < pattern)
+ return(REG_INVARG);
+ len = preg->re_endp - pattern;
+ } else
+ len = strlen((char *)pattern);
- /* initial setup (after which freev() is callable) */
- v->re = re;
- v->now = (chr *) string;
- v->stop = v->now + len;
- v->savenow = v->savestop = NULL;
- v->err = 0;
- v->cflags = flags;
- v->nsubexp = 0;
- v->subs = v->sub10;
- v->nsubs = 10;
- for (j = 0; j < v->nsubs; j++)
- v->subs[j] = NULL;
- v->nfa = NULL;
- v->cm = NULL;
- v->nlcolor = COLORLESS;
- v->wordchrs = NULL;
- v->tree = NULL;
- v->treechain = NULL;
- v->treefree = NULL;
- v->cv = NULL;
- v->cv2 = NULL;
- v->mcces = NULL;
- v->lacons = NULL;
- v->nlacons = 0;
- re->re_magic = REMAGIC;
- re->re_info = 0; /* bits get set during parse */
- re->re_csize = sizeof(chr);
- re->re_guts = NULL;
- re->re_fns = VS(&functions);
-
- /* more complex setup, malloced things */
- re->re_guts = VS(MALLOC(sizeof(struct guts)));
- if (re->re_guts == NULL)
- return freev(v, REG_ESPACE);
- g = (struct guts *) re->re_guts;
- g->tree = NULL;
- initcm(v, &g->cmap);
- v->cm = &g->cmap;
- g->lacons = NULL;
- g->nlacons = 0;
- ZAPCNFA(g->search);
- v->nfa = newnfa(v, v->cm, (struct nfa *) NULL);
- CNOERR();
- v->cv = newcvec(100, 20, 10);
- if (v->cv == NULL)
- return freev(v, REG_ESPACE);
- i = nmcces(v);
- if (i > 0)
- {
- v->mcces = newcvec(nleaders(v), 0, i);
- CNOERR();
- v->mcces = allmcces(v, v->mcces);
- leaders(v, v->mcces);
- addmcce(v->mcces, (chr *) NULL, (chr *) NULL); /* dummy */
+ /* do the mallocs early so failure handling is easy */
+ g = (struct re_guts *)malloc(sizeof(struct re_guts) +
+ (NC-1)*sizeof(cat_t));
+ if (g == NULL)
+ return(REG_ESPACE);
+ p->ssize = len/(size_t)2*(size_t)3 + (size_t)1; /* ugh */
+ p->strip = (sop *)malloc(p->ssize * sizeof(sop));
+ p->slen = 0;
+ if (p->strip == NULL) {
+ free((char *)g);
+ return(REG_ESPACE);
}
- CNOERR();
- /* parsing */
- lexstart(v); /* also handles prefixes */
- if ((v->cflags & REG_NLSTOP) || (v->cflags & REG_NLANCH))
- {
- /* assign newline a unique color */
- v->nlcolor = subcolor(v->cm, newline());
- okcolors(v->nfa, v->cm);
+ /* set things up */
+ p->g = g;
+ p->next = (char *)pattern; /* convenience; we do not modify it */
+ p->end = p->next + len;
+ p->error = 0;
+ p->ncsalloc = 0;
+ for (i = 0; i < NPAREN; i++) {
+ p->pbegin[i] = 0;
+ p->pend[i] = 0;
}
- CNOERR();
- v->tree = parse(v, EOS, PLAIN, v->nfa->init, v->nfa->final);
- assert(SEE(EOS)); /* even if error; ISERR() => SEE(EOS) */
- CNOERR();
- assert(v->tree != NULL);
+ g->csetsize = NC;
+ g->sets = NULL;
+ g->setbits = NULL;
+ g->ncsets = 0;
+ g->cflags = cflags;
+ g->iflags = 0;
+ g->nbol = 0;
+ g->neol = 0;
+ g->must = NULL;
+ g->mlen = 0;
+ g->nsub = 0;
+ g->ncategories = 1; /* category 0 is "everything else" */
+ g->categories = &g->catspace[-(CHAR_MIN)];
+ (void) memset((char *)g->catspace, 0, NC*sizeof(cat_t));
+ g->backrefs = 0;
- /* finish setup of nfa and its subre tree */
- specialcolors(v->nfa);
- CNOERR();
-#ifdef REG_DEBUG
- if (debug != NULL)
- {
- fprintf(debug, "\n\n\n========= RAW ==========\n");
- dumpnfa(v->nfa, debug);
- dumpst(v->tree, debug, 1);
- }
-#endif
- optst(v, v->tree);
- v->ntree = numst(v->tree, 1);
- markst(v->tree);
- cleanst(v);
-#ifdef REG_DEBUG
- if (debug != NULL)
- {
- fprintf(debug, "\n\n\n========= TREE FIXED ==========\n");
- dumpst(v->tree, debug, 1);
- }
+ /* do it */
+ EMIT(OEND, 0);
+ g->firststate = THERE();
+ if (cflags®_EXTENDED)
+ p_ere(p, OUT);
+ else if (cflags®_NOSPEC)
+ p_str(p);
+ else
+ p_bre(p, OUT, OUT);
+ EMIT(OEND, 0);
+ g->laststate = THERE();
+
+ /* tidy up loose ends and fill things in */
+ categorize(p, g);
+ stripsnug(p, g);
+ findmust(p, g);
+ g->nplus = pluscount(p, g);
+ g->magic = MAGIC2;
+ preg->re_nsub = g->nsub;
+ preg->re_g = g;
+ preg->re_magic = MAGIC1;
+#ifndef REDEBUG
+ /* not debugging, so can't rely on the assert() in regexec() */
+ if (g->iflags&BAD)
+ SETERROR(REG_ASSERT);
#endif
- /* build compacted NFAs for tree and lacons */
- re->re_info |= nfatree(v, v->tree, debug);
- CNOERR();
- assert(v->nlacons == 0 || v->lacons != NULL);
- for (i = 1; i < v->nlacons; i++)
- {
-#ifdef REG_DEBUG
- if (debug != NULL)
- fprintf(debug, "\n\n\n========= LA%d ==========\n", i);
-#endif
- nfanode(v, &v->lacons[i], debug);
- }
- CNOERR();
- if (v->tree->flags & SHORTER)
- NOTE(REG_USHORTEST);
-
- /* build compacted NFAs for tree, lacons, fast search */
-#ifdef REG_DEBUG
- if (debug != NULL)
- fprintf(debug, "\n\n\n========= SEARCH ==========\n");
-#endif
- /* can sacrifice main NFA now, so use it as work area */
- (DISCARD) optimize(v->nfa, debug);
- CNOERR();
- makesearch(v, v->nfa);
- CNOERR();
- compact(v->nfa, &g->search);
- CNOERR();
-
- /* looks okay, package it up */
- re->re_nsub = v->nsubexp;
- v->re = NULL; /* freev no longer frees re */
- g->magic = GUTSMAGIC;
- g->cflags = v->cflags;
- g->info = re->re_info;
- g->nsub = re->re_nsub;
- g->tree = v->tree;
- v->tree = NULL;
- g->ntree = v->ntree;
- g->compare = (v->cflags & REG_ICASE) ? casecmp : cmp;
- g->lacons = v->lacons;
- v->lacons = NULL;
- g->nlacons = v->nlacons;
-
-#ifdef REG_DEBUG
- if (flags & REG_DUMP)
- dump(re, stdout);
-#endif
-
- assert(v->err == 0);
- return freev(v, 0);
+ /* win or lose, we're done */
+ if (p->error != 0) /* lose */
+ regfree(preg);
+ return(p->error);
}
/*
- * moresubs - enlarge subRE vector
+ - p_ere - ERE parser top level, concatenation and alternation
+ == static void p_ere(register struct parse *p, int stop);
*/
static void
-moresubs(struct vars * v,
- int wanted) /* want enough room for this one */
+p_ere(p, stop)
+register struct parse *p;
+int stop; /* character this ERE should end at */
{
- struct subre **p;
- size_t n;
+ register char c;
+ register sopno prevback;
+ register sopno prevfwd;
+ register sopno conc;
+ register int first = 1; /* is this the first alternative? */
- assert(wanted > 0 && (size_t) wanted >= v->nsubs);
- n = (size_t) wanted *3 / 2 + 1;
+ for (;;) {
+ /* do a bunch of concatenated expressions */
+ conc = HERE();
+ while (MORE() && (c = PEEK()) != '|' && c != stop)
+ p_ere_exp(p);
+ REQUIRE(HERE() != conc, REG_EMPTY); /* require nonempty */
- if (v->subs == v->sub10)
- {
- p = (struct subre **) MALLOC(n * sizeof(struct subre *));
- if (p != NULL)
- memcpy(VS(p), VS(v->subs),
- v->nsubs * sizeof(struct subre *));
+ if (!EAT('|'))
+ break; /* NOTE BREAK OUT */
+
+ if (first) {
+ INSERT(OCH_, conc); /* offset is wrong */
+ prevfwd = conc;
+ prevback = conc;
+ first = 0;
}
- else
- p = (struct subre **) REALLOC(v->subs, n * sizeof(struct subre *));
- if (p == NULL)
- {
- ERR(REG_ESPACE);
- return;
+ ASTERN(OOR1, prevback);
+ prevback = THERE();
+ AHEAD(prevfwd); /* fix previous offset */
+ prevfwd = HERE();
+ EMIT(OOR2, 0); /* offset is very wrong */
}
- v->subs = p;
- for (p = &v->subs[v->nsubs]; v->nsubs < n; p++, v->nsubs++)
- *p = NULL;
- assert(v->nsubs == n);
- assert((size_t) wanted < v->nsubs);
+
+ if (!first) { /* tail-end fixups */
+ AHEAD(prevfwd);
+ ASTERN(O_CH, prevback);
+ }
+
+ assert(!MORE() || SEE(stop));
}
/*
- * freev - free vars struct's substructures where necessary
+ - p_ere_exp - parse one subERE, an atom possibly followed by a repetition op
+ == static void p_ere_exp(register struct parse *p);
+ */
+static void
+p_ere_exp(p)
+register struct parse *p;
+{
+ register char c;
+ register sopno pos;
+ register int count;
+ register int count2;
+ register sopno subno;
+ int wascaret = 0;
+
+ assert(MORE()); /* caller should have ensured this */
+ c = GETNEXT();
+
+ pos = HERE();
+ switch (c) {
+ case '(':
+ REQUIRE(MORE(), REG_EPAREN);
+ p->g->nsub++;
+ subno = p->g->nsub;
+ if (subno < NPAREN)
+ p->pbegin[subno] = HERE();
+ EMIT(OLPAREN, subno);
+ if (!SEE(')'))
+ p_ere(p, ')');
+ if (subno < NPAREN) {
+ p->pend[subno] = HERE();
+ assert(p->pend[subno] != 0);
+ }
+ EMIT(ORPAREN, subno);
+ MUSTEAT(')', REG_EPAREN);
+ break;
+#ifndef POSIX_MISTAKE
+ case ')': /* happens only if no current unmatched ( */
+ /*
+ * You may ask, why the ifndef? Because I didn't notice
+ * this until slightly too late for 1003.2, and none of the
+ * other 1003.2 regular-expression reviewers noticed it at
+ * all. So an unmatched ) is legal POSIX, at least until
+ * we can get it fixed.
+ */
+ SETERROR(REG_EPAREN);
+ break;
+#endif
+ case '^':
+ EMIT(OBOL, 0);
+ p->g->iflags |= USEBOL;
+ p->g->nbol++;
+ wascaret = 1;
+ break;
+ case '$':
+ EMIT(OEOL, 0);
+ p->g->iflags |= USEEOL;
+ p->g->neol++;
+ break;
+ case '|':
+ SETERROR(REG_EMPTY);
+ break;
+ case '*':
+ case '+':
+ case '?':
+ SETERROR(REG_BADRPT);
+ break;
+ case '.':
+ if (p->g->cflags®_NEWLINE)
+ nonnewline(p);
+ else
+ EMIT(OANY, 0);
+ break;
+ case '[':
+ p_bracket(p);
+ break;
+ case '\\':
+ REQUIRE(MORE(), REG_EESCAPE);
+ c = GETNEXT();
+ ordinary(p, c);
+ break;
+ case '{': /* okay as ordinary except if digit follows */
+ REQUIRE(!MORE() || !isdigit(PEEK()), REG_BADRPT);
+ /* FALLTHROUGH */
+ default:
+ ordinary(p, c);
+ break;
+ }
+
+ if (!MORE())
+ return;
+ c = PEEK();
+ /* we call { a repetition if followed by a digit */
+ if (!( c == '*' || c == '+' || c == '?' ||
+ (c == '{' && MORE2() && isdigit(PEEK2())) ))
+ return; /* no repetition, we're done */
+ NEXT();
+
+ REQUIRE(!wascaret, REG_BADRPT);
+ switch (c) {
+ case '*': /* implemented as +? */
+ /* this case does not require the (y|) trick, noKLUDGE */
+ INSERT(OPLUS_, pos);
+ ASTERN(O_PLUS, pos);
+ INSERT(OQUEST_, pos);
+ ASTERN(O_QUEST, pos);
+ break;
+ case '+':
+ INSERT(OPLUS_, pos);
+ ASTERN(O_PLUS, pos);
+ break;
+ case '?':
+ /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
+ INSERT(OCH_, pos); /* offset slightly wrong */
+ ASTERN(OOR1, pos); /* this one's right */
+ AHEAD(pos); /* fix the OCH_ */
+ EMIT(OOR2, 0); /* offset very wrong... */
+ AHEAD(THERE()); /* ...so fix it */
+ ASTERN(O_CH, THERETHERE());
+ break;
+ case '{':
+ count = p_count(p);
+ if (EAT(',')) {
+ if (isdigit(PEEK())) {
+ count2 = p_count(p);
+ REQUIRE(count <= count2, REG_BADBR);
+ } else /* single number with comma */
+ count2 = INFINITY;
+ } else /* just a single number */
+ count2 = count;
+ repeat(p, pos, count, count2);
+ if (!EAT('}')) { /* error heuristics */
+ while (MORE() && PEEK() != '}')
+ NEXT();
+ REQUIRE(MORE(), REG_EBRACE);
+ SETERROR(REG_BADBR);
+ }
+ break;
+ }
+
+ if (!MORE())
+ return;
+ c = PEEK();
+ if (!( c == '*' || c == '+' || c == '?' ||
+ (c == '{' && MORE2() && isdigit(PEEK2())) ) )
+ return;
+ SETERROR(REG_BADRPT);
+}
+
+/*
+ - p_str - string (no metacharacters) "parser"
+ == static void p_str(register struct parse *p);
+ */
+static void
+p_str(p)
+register struct parse *p;
+{
+ REQUIRE(MORE(), REG_EMPTY);
+ while (MORE())
+ ordinary(p, GETNEXT());
+}
+
+/*
+ - p_bre - BRE parser top level, anchoring and concatenation
+ == static void p_bre(register struct parse *p, register int end1, \
+ == register int end2);
+ * Giving end1 as OUT essentially eliminates the end1/end2 check.
*
- * Optionally does error-number setting, and always returns error code
- * (if any), to make error-handling code terser.
+ * This implementation is a bit of a kludge, in that a trailing $ is first
+ * taken as an ordinary character and then revised to be an anchor. The
+ * only undesirable side effect is that '$' gets included as a character
+ * category in such cases. This is fairly harmless; not worth fixing.
+ * The amount of lookahead needed to avoid this kludge is excessive.
+ */
+static void
+p_bre(p, end1, end2)
+register struct parse *p;
+register int end1; /* first terminating character */
+register int end2; /* second terminating character */
+{
+ register sopno start = HERE();
+ register int first = 1; /* first subexpression? */
+ register int wasdollar = 0;
+
+ if (EAT('^')) {
+ EMIT(OBOL, 0);
+ p->g->iflags |= USEBOL;
+ p->g->nbol++;
+ }
+ while (MORE() && !SEETWO(end1, end2)) {
+ wasdollar = p_simp_re(p, first);
+ first = 0;
+ }
+ if (wasdollar) { /* oops, that was a trailing anchor */
+ DROP(1);
+ EMIT(OEOL, 0);
+ p->g->iflags |= USEEOL;
+ p->g->neol++;
+ }
+
+ REQUIRE(HERE() != start, REG_EMPTY); /* require nonempty */
+}
+
+/*
+ - p_simp_re - parse a simple RE, an atom possibly followed by a repetition
+ == static int p_simp_re(register struct parse *p, int starordinary);
+ */
+static int /* was the simple RE an unbackslashed $? */
+p_simp_re(p, starordinary)
+register struct parse *p;
+int starordinary; /* is a leading * an ordinary character? */
+{
+ register int c;
+ register int count;
+ register int count2;
+ register sopno pos;
+ register int i;
+ register sopno subno;
+# define BACKSL (1<g->cflags®_NEWLINE)
+ nonnewline(p);
+ else
+ EMIT(OANY, 0);
+ break;
+ case '[':
+ p_bracket(p);
+ break;
+ case BACKSL|'{':
+ SETERROR(REG_BADRPT);
+ break;
+ case BACKSL|'(':
+ p->g->nsub++;
+ subno = p->g->nsub;
+ if (subno < NPAREN)
+ p->pbegin[subno] = HERE();
+ EMIT(OLPAREN, subno);
+ /* the MORE here is an error heuristic */
+ if (MORE() && !SEETWO('\\', ')'))
+ p_bre(p, '\\', ')');
+ if (subno < NPAREN) {
+ p->pend[subno] = HERE();
+ assert(p->pend[subno] != 0);
+ }
+ EMIT(ORPAREN, subno);
+ REQUIRE(EATTWO('\\', ')'), REG_EPAREN);
+ break;
+ case BACKSL|')': /* should not get here -- must be user */
+ case BACKSL|'}':
+ SETERROR(REG_EPAREN);
+ break;
+ case BACKSL|'1':
+ case BACKSL|'2':
+ case BACKSL|'3':
+ case BACKSL|'4':
+ case BACKSL|'5':
+ case BACKSL|'6':
+ case BACKSL|'7':
+ case BACKSL|'8':
+ case BACKSL|'9':
+ i = (c&~BACKSL) - '0';
+ assert(i < NPAREN);
+ if (p->pend[i] != 0) {
+ assert(i <= p->g->nsub);
+ EMIT(OBACK_, i);
+ assert(p->pbegin[i] != 0);
+ assert(OP(p->strip[p->pbegin[i]]) == OLPAREN);
+ assert(OP(p->strip[p->pend[i]]) == ORPAREN);
+ (void) dupl(p, p->pbegin[i]+1, p->pend[i]);
+ EMIT(O_BACK, i);
+ } else
+ SETERROR(REG_ESUBREG);
+ p->g->backrefs = 1;
+ break;
+ case '*':
+ REQUIRE(starordinary, REG_BADRPT);
+ /* FALLTHROUGH */
+ default:
+ ordinary(p, (char)c); /* takes off BACKSL, if any */
+ break;
+ }
+
+ if (EAT('*')) { /* implemented as +? */
+ /* this case does not require the (y|) trick, noKLUDGE */
+ INSERT(OPLUS_, pos);
+ ASTERN(O_PLUS, pos);
+ INSERT(OQUEST_, pos);
+ ASTERN(O_QUEST, pos);
+ } else if (EATTWO('\\', '{')) {
+ count = p_count(p);
+ if (EAT(',')) {
+ if (MORE() && isdigit(PEEK())) {
+ count2 = p_count(p);
+ REQUIRE(count <= count2, REG_BADBR);
+ } else /* single number with comma */
+ count2 = INFINITY;
+ } else /* just a single number */
+ count2 = count;
+ repeat(p, pos, count, count2);
+ if (!EATTWO('\\', '}')) { /* error heuristics */
+ while (MORE() && !SEETWO('\\', '}'))
+ NEXT();
+ REQUIRE(MORE(), REG_EBRACE);
+ SETERROR(REG_BADBR);
+ }
+ } else if (c == (unsigned char)'$') /* $ (but not \$) ends it */
+ return(1);
+
+ return(0);
+}
+
+/*
+ - p_count - parse a repetition count
+ == static int p_count(register struct parse *p);
+ */
+static int /* the value */
+p_count(p)
+register struct parse *p;
+{
+ register int count = 0;
+ register int ndigits = 0;
+
+ while (MORE() && isdigit(PEEK()) && count <= DUPMAX) {
+ count = count*10 + (GETNEXT() - '0');
+ ndigits++;
+ }
+
+ REQUIRE(ndigits > 0 && count <= DUPMAX, REG_BADBR);
+ return(count);
+}
+
+/*
+ - p_bracket - parse a bracketed character list
+ == static void p_bracket(register struct parse *p);
+ *
+ * Note a significant property of this code: if the allocset() did SETERROR,
+ * no set operations are done.
+ */
+static void
+p_bracket(p)
+register struct parse *p;
+{
+ register cset *cs = allocset(p);
+ register int invert = 0;
+
+ /* Dept of Truly Sickening Special-Case Kludges */
+ if (p->next + 5 < p->end && strncmp(p->next, "[:<:]]", 6) == 0) {
+ EMIT(OBOW, 0);
+ NEXTn(6);
+ return;
+ }
+ if (p->next + 5 < p->end && strncmp(p->next, "[:>:]]", 6) == 0) {
+ EMIT(OEOW, 0);
+ NEXTn(6);
+ return;
+ }
+
+ if (EAT('^'))
+ invert++; /* make note to invert set at end */
+ if (EAT(']'))
+ CHadd(cs, ']');
+ else if (EAT('-'))
+ CHadd(cs, '-');
+ while (MORE() && PEEK() != ']' && !SEETWO('-', ']'))
+ p_b_term(p, cs);
+ if (EAT('-'))
+ CHadd(cs, '-');
+ MUSTEAT(']', REG_EBRACK);
+
+ if (p->error != 0) /* don't mess things up further */
+ return;
+
+ if (p->g->cflags®_ICASE) {
+ register int i;
+ register int ci;
+
+ for (i = p->g->csetsize - 1; i >= 0; i--)
+ if (CHIN(cs, i) && isalpha(i)) {
+ ci = othercase(i);
+ if (ci != i)
+ CHadd(cs, ci);
+ }
+ if (cs->multis != NULL)
+ mccase(p, cs);
+ }
+ if (invert) {
+ register int i;
+
+ for (i = p->g->csetsize - 1; i >= 0; i--)
+ if (CHIN(cs, i))
+ CHsub(cs, i);
+ else
+ CHadd(cs, i);
+ if (p->g->cflags®_NEWLINE)
+ CHsub(cs, '\n');
+ if (cs->multis != NULL)
+ mcinvert(p, cs);
+ }
+
+ assert(cs->multis == NULL); /* xxx */
+
+ if (nch(p, cs) == 1) { /* optimize singleton sets */
+ ordinary(p, firstch(p, cs));
+ freeset(p, cs);
+ } else
+ EMIT(OANYOF, freezeset(p, cs));
+}
+
+/*
+ - p_b_term - parse one term of a bracketed character list
+ == static void p_b_term(register struct parse *p, register cset *cs);
+ */
+static void
+p_b_term(p, cs)
+register struct parse *p;
+register cset *cs;
+{
+ register char c;
+ register char start, finish;
+ register int i;
+
+ /* classify what we've got */
+ switch ((MORE()) ? PEEK() : '\0') {
+ case '[':
+ c = (MORE2()) ? PEEK2() : '\0';
+ break;
+ case '-':
+ SETERROR(REG_ERANGE);
+ return; /* NOTE RETURN */
+ break;
+ default:
+ c = '\0';
+ break;
+ }
+
+ switch (c) {
+ case ':': /* character class */
+ NEXT2();
+ REQUIRE(MORE(), REG_EBRACK);
+ c = PEEK();
+ REQUIRE(c != '-' && c != ']', REG_ECTYPE);
+ p_b_cclass(p, cs);
+ REQUIRE(MORE(), REG_EBRACK);
+ REQUIRE(EATTWO(':', ']'), REG_ECTYPE);
+ break;
+ case '=': /* equivalence class */
+ NEXT2();
+ REQUIRE(MORE(), REG_EBRACK);
+ c = PEEK();
+ REQUIRE(c != '-' && c != ']', REG_ECOLLATE);
+ p_b_eclass(p, cs);
+ REQUIRE(MORE(), REG_EBRACK);
+ REQUIRE(EATTWO('=', ']'), REG_ECOLLATE);
+ break;
+ default: /* symbol, ordinary character, or range */
+/* xxx revision needed for multichar stuff */
+ start = p_b_symbol(p);
+ if (SEE('-') && MORE2() && PEEK2() != ']') {
+ /* range */
+ NEXT();
+ if (EAT('-'))
+ finish = '-';
+ else
+ finish = p_b_symbol(p);
+ } else
+ finish = start;
+/* xxx what about signed chars here... */
+ REQUIRE(start <= finish, REG_ERANGE);
+ for (i = start; i <= finish; i++)
+ CHadd(cs, i);
+ break;
+ }
+}
+
+/*
+ - p_b_cclass - parse a character-class name and deal with it
+ == static void p_b_cclass(register struct parse *p, register cset *cs);
+ */
+static void
+p_b_cclass(p, cs)
+register struct parse *p;
+register cset *cs;
+{
+ register char *sp = p->next;
+ register struct cclass *cp;
+ register size_t len;
+ register char *u;
+ register char c;
+
+ while (MORE() && isalpha(PEEK()))
+ NEXT();
+ len = p->next - sp;
+ for (cp = cclasses; cp->name != NULL; cp++)
+ if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0')
+ break;
+ if (cp->name == NULL) {
+ /* oops, didn't find it */
+ SETERROR(REG_ECTYPE);
+ return;
+ }
+
+ u = cp->chars;
+ while ((c = *u++) != '\0')
+ CHadd(cs, c);
+ for (u = cp->multis; *u != '\0'; u += strlen(u) + 1)
+ MCadd(p, cs, u);
+}
+
+/*
+ - p_b_eclass - parse an equivalence-class name and deal with it
+ == static void p_b_eclass(register struct parse *p, register cset *cs);
+ *
+ * This implementation is incomplete. xxx
+ */
+static void
+p_b_eclass(p, cs)
+register struct parse *p;
+register cset *cs;
+{
+ register char c;
+
+ c = p_b_coll_elem(p, '=');
+ CHadd(cs, c);
+}
+
+/*
+ - p_b_symbol - parse a character or [..]ed multicharacter collating symbol
+ == static char p_b_symbol(register struct parse *p);
+ */
+static char /* value of symbol */
+p_b_symbol(p)
+register struct parse *p;
+{
+ register char value;
+
+ REQUIRE(MORE(), REG_EBRACK);
+ if (!EATTWO('[', '.'))
+ return(GETNEXT());
+
+ /* collating symbol */
+ value = p_b_coll_elem(p, '.');
+ REQUIRE(EATTWO('.', ']'), REG_ECOLLATE);
+ return(value);
+}
+
+/*
+ - p_b_coll_elem - parse a collating-element name and look it up
+ == static char p_b_coll_elem(register struct parse *p, int endc);
+ */
+static char /* value of collating element */
+p_b_coll_elem(p, endc)
+register struct parse *p;
+int endc; /* name ended by endc,']' */
+{
+ register char *sp = p->next;
+ register struct cname *cp;
+ register int len;
+
+ while (MORE() && !SEETWO(endc, ']'))
+ NEXT();
+ if (!MORE()) {
+ SETERROR(REG_EBRACK);
+ return(0);
+ }
+ len = p->next - sp;
+ for (cp = cnames; cp->name != NULL; cp++)
+ if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0')
+ return(cp->code); /* known name */
+ if (len == 1)
+ return(*sp); /* single character */
+ SETERROR(REG_ECOLLATE); /* neither */
+ return(0);
+}
+
+/*
+ - othercase - return the case counterpart of an alphabetic
+ == static char othercase(int ch);
+ */
+static char /* if no counterpart, return ch */
+othercase(ch)
+int ch;
+{
+ assert(isalpha(ch));
+ if (isupper(ch))
+ return(tolower(ch));
+ else if (islower(ch))
+ return(toupper(ch));
+ else /* peculiar, but could happen */
+ return(ch);
+}
+
+/*
+ - bothcases - emit a dualcase version of a two-case character
+ == static void bothcases(register struct parse *p, int ch);
+ *
+ * Boy, is this implementation ever a kludge...
+ */
+static void
+bothcases(p, ch)
+register struct parse *p;
+int ch;
+{
+ register char *oldnext = p->next;
+ register char *oldend = p->end;
+ char bracket[3];
+
+ assert(othercase(ch) != ch); /* p_bracket() would recurse */
+ p->next = bracket;
+ p->end = bracket+2;
+ bracket[0] = ch;
+ bracket[1] = ']';
+ bracket[2] = '\0';
+ p_bracket(p);
+ assert(p->next == bracket+2);
+ p->next = oldnext;
+ p->end = oldend;
+}
+
+/*
+ - ordinary - emit an ordinary character
+ == static void ordinary(register struct parse *p, register int ch);
+ */
+static void
+ordinary(p, ch)
+register struct parse *p;
+register int ch;
+{
+ register cat_t *cap = p->g->categories;
+
+ if ((p->g->cflags®_ICASE) && isalpha(ch) && othercase(ch) != ch)
+ bothcases(p, ch);
+ else {
+ EMIT(OCHAR, (unsigned char)ch);
+ if (cap[ch] == 0)
+ cap[ch] = p->g->ncategories++;
+ }
+}
+
+/*
+ - nonnewline - emit REG_NEWLINE version of OANY
+ == static void nonnewline(register struct parse *p);
+ *
+ * Boy, is this implementation ever a kludge...
+ */
+static void
+nonnewline(p)
+register struct parse *p;
+{
+ register char *oldnext = p->next;
+ register char *oldend = p->end;
+ char bracket[4];
+
+ p->next = bracket;
+ p->end = bracket+3;
+ bracket[0] = '^';
+ bracket[1] = '\n';
+ bracket[2] = ']';
+ bracket[3] = '\0';
+ p_bracket(p);
+ assert(p->next == bracket+3);
+ p->next = oldnext;
+ p->end = oldend;
+}
+
+/*
+ - repeat - generate code for a bounded repetition, recursively if needed
+ == static void repeat(register struct parse *p, sopno start, int from, int to);
+ */
+static void
+repeat(p, start, from, to)
+register struct parse *p;
+sopno start; /* operand from here to end of strip */
+int from; /* repeated from this number */
+int to; /* to this number of times (maybe INFINITY) */
+{
+ register sopno finish = HERE();
+# define N 2
+# define INF 3
+# define REP(f, t) ((f)*8 + (t))
+# define MAP(n) (((n) <= 1) ? (n) : ((n) == INFINITY) ? INF : N)
+ register sopno copy;
+
+ if (p->error != 0) /* head off possible runaway recursion */
+ return;
+
+ assert(from <= to);
+
+ switch (REP(MAP(from), MAP(to))) {
+ case REP(0, 0): /* must be user doing this */
+ DROP(finish-start); /* drop the operand */
+ break;
+ case REP(0, 1): /* as x{1,1}? */
+ case REP(0, N): /* as x{1,n}? */
+ case REP(0, INF): /* as x{1,}? */
+ /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
+ INSERT(OCH_, start); /* offset is wrong... */
+ repeat(p, start+1, 1, to);
+ ASTERN(OOR1, start);
+ AHEAD(start); /* ... fix it */
+ EMIT(OOR2, 0);
+ AHEAD(THERE());
+ ASTERN(O_CH, THERETHERE());
+ break;
+ case REP(1, 1): /* trivial case */
+ /* done */
+ break;
+ case REP(1, N): /* as x?x{1,n-1} */
+ /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
+ INSERT(OCH_, start);
+ ASTERN(OOR1, start);
+ AHEAD(start);
+ EMIT(OOR2, 0); /* offset very wrong... */
+ AHEAD(THERE()); /* ...so fix it */
+ ASTERN(O_CH, THERETHERE());
+ copy = dupl(p, start+1, finish+1);
+ assert(copy == finish+4);
+ repeat(p, copy, 1, to-1);
+ break;
+ case REP(1, INF): /* as x+ */
+ INSERT(OPLUS_, start);
+ ASTERN(O_PLUS, start);
+ break;
+ case REP(N, N): /* as xx{m-1,n-1} */
+ copy = dupl(p, start, finish);
+ repeat(p, copy, from-1, to-1);
+ break;
+ case REP(N, INF): /* as xx{n-1,INF} */
+ copy = dupl(p, start, finish);
+ repeat(p, copy, from-1, to);
+ break;
+ default: /* "can't happen" */
+ SETERROR(REG_ASSERT); /* just in case */
+ break;
+ }
+}
+
+/*
+ - seterr - set an error condition
+ == static int seterr(register struct parse *p, int e);
+ */
+static int /* useless but makes type checking happy */
+seterr(p, e)
+register struct parse *p;
+int e;
+{
+ if (p->error == 0) /* keep earliest error condition */
+ p->error = e;
+ p->next = nuls; /* try to bring things to a halt */
+ p->end = nuls;
+ return(0); /* make the return value well-defined */
+}
+
+/*
+ - allocset - allocate a set of characters for []
+ == static cset *allocset(register struct parse *p);
+ */
+static cset *
+allocset(p)
+register struct parse *p;
+{
+ register int no = p->g->ncsets++;
+ register size_t nc;
+ register size_t nbytes;
+ register cset *cs;
+ register size_t css = (size_t)p->g->csetsize;
+ register int i;
+
+ if (no >= p->ncsalloc) { /* need another column of space */
+ p->ncsalloc += CHAR_BIT;
+ nc = p->ncsalloc;
+ assert(nc % CHAR_BIT == 0);
+ nbytes = nc / CHAR_BIT * css;
+ if (p->g->sets == NULL)
+ p->g->sets = (cset *)malloc(nc * sizeof(cset));
+ else
+ p->g->sets = (cset *)realloc((char *)p->g->sets,
+ nc * sizeof(cset));
+ if (p->g->setbits == NULL)
+ p->g->setbits = (uch *)malloc(nbytes);
+ else {
+ p->g->setbits = (uch *)realloc((char *)p->g->setbits,
+ nbytes);
+ /* xxx this isn't right if setbits is now NULL */
+ for (i = 0; i < no; i++)
+ p->g->sets[i].ptr = p->g->setbits + css*(i/CHAR_BIT);
+ }
+ if (p->g->sets != NULL && p->g->setbits != NULL)
+ (void) memset((char *)p->g->setbits + (nbytes - css),
+ 0, css);
+ else {
+ no = 0;
+ SETERROR(REG_ESPACE);
+ /* caller's responsibility not to do set ops */
+ }
+ }
+
+ assert(p->g->sets != NULL); /* xxx */
+ cs = &p->g->sets[no];
+ cs->ptr = p->g->setbits + css*((no)/CHAR_BIT);
+ cs->mask = 1 << ((no) % CHAR_BIT);
+ cs->hash = 0;
+ cs->smultis = 0;
+ cs->multis = NULL;
+
+ return(cs);
+}
+
+/*
+ - freeset - free a now-unused set
+ == static void freeset(register struct parse *p, register cset *cs);
+ */
+static void
+freeset(p, cs)
+register struct parse *p;
+register cset *cs;
+{
+ register size_t i;
+ register cset *top = &p->g->sets[p->g->ncsets];
+ register size_t css = (size_t)p->g->csetsize;
+
+ for (i = 0; i < css; i++)
+ CHsub(cs, i);
+ if (cs == top-1) /* recover only the easy case */
+ p->g->ncsets--;
+}
+
+/*
+ - freezeset - final processing on a set of characters
+ == static int freezeset(register struct parse *p, register cset *cs);
+ *
+ * The main task here is merging identical sets. This is usually a waste
+ * of time (although the hash code minimizes the overhead), but can win
+ * big if REG_ICASE is being used. REG_ICASE, by the way, is why the hash
+ * is done using addition rather than xor -- all ASCII [aA] sets xor to
+ * the same value!
+ */
+static int /* set number */
+freezeset(p, cs)
+register struct parse *p;
+register cset *cs;
+{
+ register uch h = cs->hash;
+ register size_t i;
+ register cset *top = &p->g->sets[p->g->ncsets];
+ register cset *cs2;
+ register size_t css = (size_t)p->g->csetsize;
+
+ /* look for an earlier one which is the same */
+ for (cs2 = &p->g->sets[0]; cs2 < top; cs2++)
+ if (cs2->hash == h && cs2 != cs) {
+ /* maybe */
+ for (i = 0; i < css; i++)
+ if (!!CHIN(cs2, i) != !!CHIN(cs, i))
+ break; /* no */
+ if (i == css)
+ break; /* yes */
+ }
+
+ if (cs2 < top) { /* found one */
+ freeset(p, cs);
+ cs = cs2;
+ }
+
+ return((int)(cs - p->g->sets));
+}
+
+/*
+ - firstch - return first character in a set (which must have at least one)
+ == static int firstch(register struct parse *p, register cset *cs);
+ */
+static int /* character; there is no "none" value */
+firstch(p, cs)
+register struct parse *p;
+register cset *cs;
+{
+ register size_t i;
+ register size_t css = (size_t)p->g->csetsize;
+
+ for (i = 0; i < css; i++)
+ if (CHIN(cs, i))
+ return((char)i);
+ assert(never);
+ return(0); /* arbitrary */
+}
+
+/*
+ - nch - number of characters in a set
+ == static int nch(register struct parse *p, register cset *cs);
*/
static int
-freev(struct vars * v,
- int err)
+nch(p, cs)
+register struct parse *p;
+register cset *cs;
{
- if (v->re != NULL)
- rfree(v->re);
- if (v->subs != v->sub10)
- FREE(v->subs);
- if (v->nfa != NULL)
- freenfa(v->nfa);
- if (v->tree != NULL)
- freesubre(v, v->tree);
- if (v->treechain != NULL)
- cleanst(v);
- if (v->cv != NULL)
- freecvec(v->cv);
- if (v->cv2 != NULL)
- freecvec(v->cv2);
- if (v->mcces != NULL)
- freecvec(v->mcces);
- if (v->lacons != NULL)
- freelacons(v->lacons, v->nlacons);
- ERR(err); /* nop if err==0 */
+ register size_t i;
+ register size_t css = (size_t)p->g->csetsize;
+ register int n = 0;
- return v->err;
+ for (i = 0; i < css; i++)
+ if (CHIN(cs, i))
+ n++;
+ return(n);
}
/*
- * makesearch - turn an NFA into a search NFA (implicit prepend of .*?)
- * NFA must have been optimize()d already.
+ - mcadd - add a collating element to a cset
+ == static void mcadd(register struct parse *p, register cset *cs, \
+ == register char *cp);
*/
static void
-makesearch(struct vars * v,
- struct nfa * nfa)
+mcadd(p, cs, cp)
+register struct parse *p;
+register cset *cs;
+register char *cp;
{
- struct arc *a;
- struct arc *b;
- struct state *pre = nfa->pre;
- struct state *s;
- struct state *s2;
- struct state *slist;
+ register size_t oldend = cs->smultis;
- /* no loops are needed if it's anchored */
- for (a = pre->outs; a != NULL; a = a->outchain)
- {
- assert(a->type == PLAIN);
- if (a->co != nfa->bos[0] && a->co != nfa->bos[1])
- break;
- }
- if (a != NULL)
- {
- /* add implicit .* in front */
- rainbow(nfa, v->cm, PLAIN, COLORLESS, pre, pre);
-
- /* and ^* and \A* too -- not always necessary, but harmless */
- newarc(nfa, PLAIN, nfa->bos[0], pre, pre);
- newarc(nfa, PLAIN, nfa->bos[1], pre, pre);
+ cs->smultis += strlen(cp) + 1;
+ if (cs->multis == NULL)
+ cs->multis = malloc(cs->smultis);
+ else
+ cs->multis = realloc(cs->multis, cs->smultis);
+ if (cs->multis == NULL) {
+ SETERROR(REG_ESPACE);
+ return;
}
- /*
- * Now here's the subtle part. Because many REs have no lookback
- * constraints, often knowing when you were in the pre state tells you
- * little; it's the next state(s) that are informative. But some of
- * them may have other inarcs, i.e. it may be possible to make actual
- * progress and then return to one of them. We must de-optimize such
- * cases, splitting each such state into progress and no-progress
- * states.
+ (void) strcpy(cs->multis + oldend - 1, cp);
+ cs->multis[cs->smultis - 1] = '\0';
+}
+
+/* these functions don't seem to be used (yet?), suppress warnings */
+#if 0
+/*
+ - mcsub - subtract a collating element from a cset
+ == static void mcsub(register cset *cs, register char *cp);
*/
+static void
+mcsub(cs, cp)
+register cset *cs;
+register char *cp;
+{
+ register char *fp = mcfind(cs, cp);
+ register size_t len = strlen(fp);
- /* first, make a list of the states */
- slist = NULL;
- for (a = pre->outs; a != NULL; a = a->outchain)
- {
- s = a->to;
- for (b = s->ins; b != NULL; b = b->inchain)
- if (b->from != pre)
- break;
- if (b != NULL)
- { /* must be split */
- s->tmp = slist;
- slist = s;
- }
- }
+ assert(fp != NULL);
+ (void) memmove(fp, fp + len + 1,
+ cs->smultis - (fp + len + 1 - cs->multis));
+ cs->smultis -= len;
- /* do the splits */
- for (s = slist; s != NULL; s = s2)
- {
- s2 = newstate(nfa);
- copyouts(nfa, s, s2);
- for (a = s->ins; a != NULL; a = b)
- {
- b = a->inchain;
- if (a->from != pre)
- {
- cparc(nfa, a, a->from, s2);
- freearc(nfa, a);
- }
- }
- s2 = s->tmp;
- s->tmp = NULL; /* clean up while we're at it */
+ if (cs->smultis == 0) {
+ free(cs->multis);
+ cs->multis = NULL;
+ return;
}
+
+ cs->multis = realloc(cs->multis, cs->smultis);
+ assert(cs->multis != NULL);
}
/*
- * parse - parse an RE
+ - mcin - is a collating element in a cset?
+ == static int mcin(register cset *cs, register char *cp);
+ */
+static int
+mcin(cs, cp)
+register cset *cs;
+register char *cp;
+{
+ return(mcfind(cs, cp) != NULL);
+}
+
+/*
+ - mcfind - find a collating element in a cset
+ == static char *mcfind(register cset *cs, register char *cp);
+ */
+static char *
+mcfind(cs, cp)
+register cset *cs;
+register char *cp;
+{
+ register char *p;
+
+ if (cs->multis == NULL)
+ return(NULL);
+ for (p = cs->multis; *p != '\0'; p += strlen(p) + 1)
+ if (strcmp(cp, p) == 0)
+ return(p);
+ return(NULL);
+}
+#endif /* 0 */
+
+/*
+ - mcinvert - invert the list of collating elements in a cset
+ == static void mcinvert(register struct parse *p, register cset *cs);
*
- * This is actually just the top level, which parses a bunch of branches
- * tied together with '|'. They appear in the tree as the left children
- * of a chain of '|' subres.
+ * This would have to know the set of possibilities. Implementation
+ * is deferred.
*/
-static struct subre *
-parse(struct vars * v,
- int stopper, /* EOS or ')' */
- int type, /* LACON (lookahead subRE) or PLAIN */
- struct state * init, /* initial state */
- struct state * final) /* final state */
+static void
+mcinvert(p, cs)
+register struct parse *p;
+register cset *cs;
{
- struct state *left; /* scaffolding for branch */
- struct state *right;
- struct subre *branches; /* top level */
- struct subre *branch; /* current branch */
- struct subre *t; /* temporary */
- int firstbranch; /* is this the first branch? */
-
- assert(stopper == ')' || stopper == EOS);
-
- branches = subre(v, '|', LONGER, init, final);
- NOERRN();
- branch = branches;
- firstbranch = 1;
- do
- { /* a branch */
- if (!firstbranch)
- {
- /* need a place to hang it */
- branch->right = subre(v, '|', LONGER, init, final);
- NOERRN();
- branch = branch->right;
- }
- firstbranch = 0;
- left = newstate(v->nfa);
- right = newstate(v->nfa);
- NOERRN();
- EMPTYARC(init, left);
- EMPTYARC(right, final);
- NOERRN();
- branch->left = parsebranch(v, stopper, type, left, right, 0);
- NOERRN();
- branch->flags |= UP(branch->flags | branch->left->flags);
- if ((branch->flags & ~branches->flags) != 0) /* new flags */
- for (t = branches; t != branch; t = t->right)
- t->flags |= branch->flags;
- } while (EAT('|'));
- assert(SEE(stopper) || SEE(EOS));
-
- if (!SEE(stopper))
- {
- assert(stopper == ')' && SEE(EOS));
- ERR(REG_EPAREN);
- }
-
- /* optimize out simple cases */
- if (branch == branches)
- { /* only one branch */
- assert(branch->right == NULL);
- t = branch->left;
- branch->left = NULL;
- freesubre(v, branches);
- branches = t;
- }
- else if (!MESSY(branches->flags))
- { /* no interesting innards */
- freesubre(v, branches->left);
- branches->left = NULL;
- freesubre(v, branches->right);
- branches->right = NULL;
- branches->op = '=';
- }
-
- return branches;
+ assert(cs->multis == NULL); /* xxx */
}
/*
- * parsebranch - parse one branch of an RE
+ - mccase - add case counterparts of the list of collating elements in a cset
+ == static void mccase(register struct parse *p, register cset *cs);
*
- * This mostly manages concatenation, working closely with parseqatom().
- * Concatenated things are bundled up as much as possible, with separate
- * ',' nodes introduced only when necessary due to substructure.
+ * This would have to know the set of possibilities. Implementation
+ * is deferred.
*/
-static struct subre *
-parsebranch(struct vars * v,
- int stopper, /* EOS or ')' */
- int type, /* LACON (lookahead subRE) or PLAIN */
- struct state * left, /* leftmost state */
- struct state * right, /* rightmost state */
- int partial) /* is this only part of a branch? */
+static void
+mccase(p, cs)
+register struct parse *p;
+register cset *cs;
{
- struct state *lp; /* left end of current construct */
- int seencontent; /* is there anything in this branch yet? */
- struct subre *t;
-
- lp = left;
- seencontent = 0;
- t = subre(v, '=', 0, left, right); /* op '=' is tentative */
- NOERRN();
- while (!SEE('|') && !SEE(stopper) && !SEE(EOS))
- {
- if (seencontent)
- { /* implicit concat operator */
- lp = newstate(v->nfa);
- NOERRN();
- moveins(v->nfa, right, lp);
- }
- seencontent = 1;
-
- /* NB, recursion in parseqatom() may swallow rest of branch */
- parseqatom(v, stopper, type, lp, right, t);
- }
-
- if (!seencontent)
- { /* empty branch */
- if (!partial)
- NOTE(REG_UUNSPEC);
- assert(lp == left);
- EMPTYARC(left, right);
- }
-
- return t;
+ assert(cs->multis == NULL); /* xxx */
}
/*
- * parseqatom - parse one quantified atom or constraint of an RE
+ - isinsets - is this character in any sets?
+ == static int isinsets(register struct re_guts *g, int c);
+ */
+static int /* predicate */
+isinsets(g, c)
+register struct re_guts *g;
+int c;
+{
+ register uch *col;
+ register int i;
+ register int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT;
+ register unsigned uc = (unsigned char)c;
+
+ for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize)
+ if (col[uc] != 0)
+ return(1);
+ return(0);
+}
+
+/*
+ - samesets - are these two characters in exactly the same sets?
+ == static int samesets(register struct re_guts *g, int c1, int c2);
+ */
+static int /* predicate */
+samesets(g, c1, c2)
+register struct re_guts *g;
+int c1;
+int c2;
+{
+ register uch *col;
+ register int i;
+ register int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT;
+ register unsigned uc1 = (unsigned char)c1;
+ register unsigned uc2 = (unsigned char)c2;
+
+ for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize)
+ if (col[uc1] != col[uc2])
+ return(0);
+ return(1);
+}
+
+/*
+ - categorize - sort out character categories
+ == static void categorize(struct parse *p, register struct re_guts *g);
+ */
+static void
+categorize(p, g)
+struct parse *p;
+register struct re_guts *g;
+{
+ register cat_t *cats = g->categories;
+ register int c;
+ register int c2;
+ register cat_t cat;
+
+ /* avoid making error situations worse */
+ if (p->error != 0)
+ return;
+
+ for (c = CHAR_MIN; c <= CHAR_MAX; c++)
+ if (cats[c] == 0 && isinsets(g, c)) {
+ cat = g->ncategories++;
+ cats[c] = cat;
+ for (c2 = c+1; c2 <= CHAR_MAX; c2++)
+ if (cats[c2] == 0 && samesets(g, c, c2))
+ cats[c2] = cat;
+ }
+}
+
+/*
+ - dupl - emit a duplicate of a bunch of sops
+ == static sopno dupl(register struct parse *p, sopno start, sopno finish);
+ */
+static sopno /* start of duplicate */
+dupl(p, start, finish)
+register struct parse *p;
+sopno start; /* from here */
+sopno finish; /* to this less one */
+{
+ register sopno ret = HERE();
+ register sopno len = finish - start;
+
+ assert(finish >= start);
+ if (len == 0)
+ return(ret);
+ enlarge(p, p->ssize + len); /* this many unexpected additions */
+ assert(p->ssize >= p->slen + len);
+ (void) memcpy((char *)(p->strip + p->slen),
+ (char *)(p->strip + start), (size_t)len*sizeof(sop));
+ p->slen += len;
+ return(ret);
+}
+
+/*
+ - doemit - emit a strip operator
+ == static void doemit(register struct parse *p, sop op, size_t opnd);
*
- * The bookkeeping near the end cooperates very closely with parsebranch();
- * in particular, it contains a recursion that can involve parsing the rest
- * of the branch, making this function's name somewhat inaccurate.
+ * It might seem better to implement this as a macro with a function as
+ * hard-case backup, but it's just too big and messy unless there are
+ * some changes to the data structures. Maybe later.
*/
static void
-parseqatom(struct vars * v,
- int stopper, /* EOS or ')' */
- int type, /* LACON (lookahead subRE) or PLAIN */
- struct state * lp, /* left state to hang it on */
- struct state * rp, /* right state to hang it on */
- struct subre * top) /* subtree top */
+doemit(p, op, opnd)
+register struct parse *p;
+sop op;
+size_t opnd;
{
- struct state *s; /* temporaries for new states */
- struct state *s2;
-
-#define ARCV(t, val) newarc(v->nfa, t, val, lp, rp)
- int m,
- n;
- struct subre *atom; /* atom's subtree */
- struct subre *t;
- int cap; /* capturing parens? */
- int pos; /* positive lookahead? */
- int subno; /* capturing-parens or backref number */
- int atomtype;
- int qprefer; /* quantifier short/long preference */
- int f;
- struct subre **atomp; /* where the pointer to atom is */
-
- /* initial bookkeeping */
- atom = NULL;
- assert(lp->nouts == 0); /* must string new code */
- assert(rp->nins == 0); /* between lp and rp */
- subno = 0; /* just to shut lint up */
-
- /* an atom or constraint... */
- atomtype = v->nexttype;
- switch (atomtype)
- {
- /* first, constraints, which end by returning */
- case '^':
- ARCV('^', 1);
- if (v->cflags & REG_NLANCH)
- ARCV(BEHIND, v->nlcolor);
- NEXT();
- return;
- break;
- case '$':
- ARCV('$', 1);
- if (v->cflags & REG_NLANCH)
- ARCV(AHEAD, v->nlcolor);
- NEXT();
- return;
- break;
- case SBEGIN:
- ARCV('^', 1); /* BOL */
- ARCV('^', 0); /* or BOS */
- NEXT();
- return;
- break;
- case SEND:
- ARCV('$', 1); /* EOL */
- ARCV('$', 0); /* or EOS */
- NEXT();
- return;
- break;
- case '<':
- wordchrs(v); /* does NEXT() */
- s = newstate(v->nfa);
- NOERR();
- nonword(v, BEHIND, lp, s);
- word(v, AHEAD, s, rp);
- return;
- break;
- case '>':
- wordchrs(v); /* does NEXT() */
- s = newstate(v->nfa);
- NOERR();
- word(v, BEHIND, lp, s);
- nonword(v, AHEAD, s, rp);
- return;
- break;
- case WBDRY:
- wordchrs(v); /* does NEXT() */
- s = newstate(v->nfa);
- NOERR();
- nonword(v, BEHIND, lp, s);
- word(v, AHEAD, s, rp);
- s = newstate(v->nfa);
- NOERR();
- word(v, BEHIND, lp, s);
- nonword(v, AHEAD, s, rp);
- return;
- break;
- case NWBDRY:
- wordchrs(v); /* does NEXT() */
- s = newstate(v->nfa);
- NOERR();
- word(v, BEHIND, lp, s);
- word(v, AHEAD, s, rp);
- s = newstate(v->nfa);
- NOERR();
- nonword(v, BEHIND, lp, s);
- nonword(v, AHEAD, s, rp);
- return;
- break;
- case LACON: /* lookahead constraint */
- pos = v->nextvalue;
- NEXT();
- s = newstate(v->nfa);
- s2 = newstate(v->nfa);
- NOERR();
- t = parse(v, ')', LACON, s, s2);
- freesubre(v, t); /* internal structure irrelevant */
- assert(SEE(')') || ISERR());
- NEXT();
- n = newlacon(v, s, s2, pos);
- NOERR();
- ARCV(LACON, n);
- return;
- break;
- /* then errors, to get them out of the way */
- case '*':
- case '+':
- case '?':
- case '{':
- ERR(REG_BADRPT);
- return;
- break;
- default:
- ERR(REG_ASSERT);
- return;
- break;
- /* then plain characters, and minor variants on that theme */
- case ')': /* unbalanced paren */
- if ((v->cflags & REG_ADVANCED) != REG_EXTENDED)
- {
- ERR(REG_EPAREN);
- return;
- }
- /* legal in EREs due to specification botch */
- NOTE(REG_UPBOTCH);
- /* fallthrough into case PLAIN */
- case PLAIN:
- onechr(v, v->nextvalue, lp, rp);
- okcolors(v->nfa, v->cm);
- NOERR();
- NEXT();
- break;
- case '[':
- if (v->nextvalue == 1)
- bracket(v, lp, rp);
- else
- cbracket(v, lp, rp);
- assert(SEE(']') || ISERR());
- NEXT();
- break;
- case '.':
- rainbow(v->nfa, v->cm, PLAIN,
- (v->cflags & REG_NLSTOP) ? v->nlcolor : COLORLESS,
- lp, rp);
- NEXT();
- break;
- /* and finally the ugly stuff */
- case '(': /* value flags as capturing or non */
- cap = (type == LACON) ? 0 : v->nextvalue;
- if (cap)
- {
- v->nsubexp++;
- subno = v->nsubexp;
- if ((size_t) subno >= v->nsubs)
- moresubs(v, subno);
- assert((size_t) subno < v->nsubs);
- }
- else
- atomtype = PLAIN; /* something that's not '(' */
- NEXT();
- /* need new endpoints because tree will contain pointers */
- s = newstate(v->nfa);
- s2 = newstate(v->nfa);
- NOERR();
- EMPTYARC(lp, s);
- EMPTYARC(s2, rp);
- NOERR();
- atom = parse(v, ')', PLAIN, s, s2);
- assert(SEE(')') || ISERR());
- NEXT();
- NOERR();
- if (cap)
- {
- v->subs[subno] = atom;
- t = subre(v, '(', atom->flags | CAP, lp, rp);
- NOERR();
- t->subno = subno;
- t->left = atom;
- atom = t;
- }
- /* postpone everything else pending possible {0} */
- break;
- case BACKREF: /* the Feature From The Black Lagoon */
- INSIST(type != LACON, REG_ESUBREG);
- INSIST(v->nextvalue < v->nsubs, REG_ESUBREG);
- INSIST(v->subs[v->nextvalue] != NULL, REG_ESUBREG);
- NOERR();
- assert(v->nextvalue > 0);
- atom = subre(v, 'b', BACKR, lp, rp);
- subno = v->nextvalue;
- atom->subno = subno;
- EMPTYARC(lp, rp); /* temporarily, so there's something */
- NEXT();
- break;
- }
-
- /* ...and an atom may be followed by a quantifier */
- switch (v->nexttype)
- {
- case '*':
- m = 0;
- n = INFINITY;
- qprefer = (v->nextvalue) ? LONGER : SHORTER;
- NEXT();
- break;
- case '+':
- m = 1;
- n = INFINITY;
- qprefer = (v->nextvalue) ? LONGER : SHORTER;
- NEXT();
- break;
- case '?':
- m = 0;
- n = 1;
- qprefer = (v->nextvalue) ? LONGER : SHORTER;
- NEXT();
- break;
- case '{':
- NEXT();
- m = scannum(v);
- if (EAT(','))
- {
- if (SEE(DIGIT))
- n = scannum(v);
- else
- n = INFINITY;
- if (m > n)
- {
- ERR(REG_BADBR);
+ /* avoid making error situations worse */
+ if (p->error != 0)
return;
- }
- /* {m,n} exercises preference, even if it's {m,m} */
- qprefer = (v->nextvalue) ? LONGER : SHORTER;
- }
- else
- {
- n = m;
- /* {m} passes operand's preference through */
- qprefer = 0;
- }
- if (!SEE('}'))
- { /* catches errors too */
- ERR(REG_BADBR);
- return;
- }
- NEXT();
- break;
- default: /* no quantifier */
- m = n = 1;
- qprefer = 0;
- break;
- }
- /* annoying special case: {0} or {0,0} cancels everything */
- if (m == 0 && n == 0)
- {
- if (atom != NULL)
- freesubre(v, atom);
- if (atomtype == '(')
- v->subs[subno] = NULL;
- delsub(v->nfa, lp, rp);
- EMPTYARC(lp, rp);
- return;
- }
+ /* deal with oversize operands ("can't happen", more or less) */
+ assert(opnd < 1<flags));
- f = top->flags | qprefer | ((atom != NULL) ? atom->flags : 0);
- if (atomtype != '(' && atomtype != BACKREF && !MESSY(UP(f)))
- {
- if (!(m == 1 && n == 1))
- repeat(v, lp, rp, m, n);
- if (atom != NULL)
- freesubre(v, atom);
- top->flags = f;
- return;
- }
+ /* deal with undersized strip */
+ if (p->slen >= p->ssize)
+ enlarge(p, (p->ssize+1) / 2 * 3); /* +50% */
+ assert(p->slen < p->ssize);
- /*
- * hard part: something messy That is, capturing parens, back
- * reference, short/long clash, or an atom with substructure
- * containing one of those.
- */
-
- /* now we'll need a subre for the contents even if they're boring */
- if (atom == NULL)
- {
- atom = subre(v, '=', 0, lp, rp);
- NOERR();
- }
-
- /*
- * prepare a general-purpose state skeleton
- *
- * ---> [s] ---prefix---> [begin] ---atom---> [end] ----rest---> [rp] / /
- * [lp] ----> [s2] ----bypass---------------------
- *
- * where bypass is an empty, and prefix is some repetitions of atom
- */
- s = newstate(v->nfa); /* first, new endpoints for the atom */
- s2 = newstate(v->nfa);
- NOERR();
- moveouts(v->nfa, lp, s);
- moveins(v->nfa, rp, s2);
- NOERR();
- atom->begin = s;
- atom->end = s2;
- s = newstate(v->nfa); /* and spots for prefix and bypass */
- s2 = newstate(v->nfa);
- NOERR();
- EMPTYARC(lp, s);
- EMPTYARC(lp, s2);
- NOERR();
-
- /* break remaining subRE into x{...} and what follows */
- t = subre(v, '.', COMBINE(qprefer, atom->flags), lp, rp);
- t->left = atom;
- atomp = &t->left;
- /* here we should recurse... but we must postpone that to the end */
-
- /* split top into prefix and remaining */
- assert(top->op == '=' && top->left == NULL && top->right == NULL);
- top->left = subre(v, '=', top->flags, top->begin, lp);
- top->op = '.';
- top->right = t;
-
- /* if it's a backref, now is the time to replicate the subNFA */
- if (atomtype == BACKREF)
- {
- assert(atom->begin->nouts == 1); /* just the EMPTY */
- delsub(v->nfa, atom->begin, atom->end);
- assert(v->subs[subno] != NULL);
- /* and here's why the recursion got postponed: it must */
- /* wait until the skeleton is filled in, because it may */
- /* hit a backref that wants to copy the filled-in skeleton */
- dupnfa(v->nfa, v->subs[subno]->begin, v->subs[subno]->end,
- atom->begin, atom->end);
- NOERR();
- }
-
- /* it's quantifier time; first, turn x{0,...} into x{1,...}|empty */
- if (m == 0)
- {
- EMPTYARC(s2, atom->end); /* the bypass */
- assert(PREF(qprefer) != 0);
- f = COMBINE(qprefer, atom->flags);
- t = subre(v, '|', f, lp, atom->end);
- NOERR();
- t->left = atom;
- t->right = subre(v, '|', PREF(f), s2, atom->end);
- NOERR();
- t->right->left = subre(v, '=', 0, s2, atom->end);
- NOERR();
- *atomp = t;
- atomp = &t->left;
- m = 1;
- }
-
- /* deal with the rest of the quantifier */
- if (atomtype == BACKREF)
- {
- /* special case: backrefs have internal quantifiers */
- EMPTYARC(s, atom->begin); /* empty prefix */
- /* just stuff everything into atom */
- repeat(v, atom->begin, atom->end, m, n);
- atom->min = (short) m;
- atom->max = (short) n;
- atom->flags |= COMBINE(qprefer, atom->flags);
- }
- else if (m == 1 && n == 1)
- {
- /* no/vacuous quantifier: done */
- EMPTYARC(s, atom->begin); /* empty prefix */
- }
- else
- {
- /* turn x{m,n} into x{m-1,n-1}x, with capturing */
- /* parens in only second x */
- dupnfa(v->nfa, atom->begin, atom->end, s, atom->begin);
- assert(m >= 1 && m != INFINITY && n >= 1);
- repeat(v, s, atom->begin, m - 1, (n == INFINITY) ? n : n - 1);
- f = COMBINE(qprefer, atom->flags);
- t = subre(v, '.', f, s, atom->end); /* prefix and atom */
- NOERR();
- t->left = subre(v, '=', PREF(f), s, atom->begin);
- NOERR();
- t->right = atom;
- *atomp = t;
- }
-
- /* and finally, look after that postponed recursion */
- t = top->right;
- if (!(SEE('|') || SEE(stopper) || SEE(EOS)))
- t->right = parsebranch(v, stopper, type, atom->end, rp, 1);
- else
- {
- EMPTYARC(atom->end, rp);
- t->right = subre(v, '=', 0, atom->end, rp);
- }
- assert(SEE('|') || SEE(stopper) || SEE(EOS));
- t->flags |= COMBINE(t->flags, t->right->flags);
- top->flags |= COMBINE(top->flags, t->flags);
+ /* finally, it's all reduced to the easy case */
+ p->strip[p->slen++] = SOP(op, opnd);
}
/*
- * nonword - generate arcs for non-word-character ahead or behind
+ - doinsert - insert a sop into the strip
+ == static void doinsert(register struct parse *p, sop op, size_t opnd, sopno pos);
*/
static void
-nonword(struct vars * v,
- int dir, /* AHEAD or BEHIND */
- struct state * lp,
- struct state * rp)
+doinsert(p, op, opnd, pos)
+register struct parse *p;
+sop op;
+size_t opnd;
+sopno pos;
{
- int anchor = (dir == AHEAD) ? '$' : '^';
+ register sopno sn;
+ register sop s;
+ register int i;
- assert(dir == AHEAD || dir == BEHIND);
- newarc(v->nfa, anchor, 1, lp, rp);
- newarc(v->nfa, anchor, 0, lp, rp);
- colorcomplement(v->nfa, v->cm, dir, v->wordchrs, lp, rp);
- /* (no need for special attention to \n) */
+ /* avoid making error situations worse */
+ if (p->error != 0)
+ return;
+
+ sn = HERE();
+ EMIT(op, opnd); /* do checks, ensure space */
+ assert(HERE() == sn+1);
+ s = p->strip[sn];
+
+ /* adjust paren pointers */
+ assert(pos > 0);
+ for (i = 1; i < NPAREN; i++) {
+ if (p->pbegin[i] >= pos) {
+ p->pbegin[i]++;
+ }
+ if (p->pend[i] >= pos) {
+ p->pend[i]++;
+ }
+ }
+
+ memmove((char *)&p->strip[pos+1], (char *)&p->strip[pos],
+ (HERE()-pos-1)*sizeof(sop));
+ p->strip[pos] = s;
}
/*
- * word - generate arcs for word character ahead or behind
+ - dofwd - complete a forward reference
+ == static void dofwd(register struct parse *p, sopno pos, sop value);
*/
static void
-word(struct vars * v,
- int dir, /* AHEAD or BEHIND */
- struct state * lp,
- struct state * rp)
+dofwd(p, pos, value)
+register struct parse *p;
+register sopno pos;
+sop value;
{
- assert(dir == AHEAD || dir == BEHIND);
- cloneouts(v->nfa, v->wordchrs, lp, rp, dir);
- /* (no need for special attention to \n) */
+ /* avoid making error situations worse */
+ if (p->error != 0)
+ return;
+
+ assert(value < 1<strip[pos] = OP(p->strip[pos]) | value;
}
/*
- * scannum - scan a number
+ - enlarge - enlarge the strip
+ == static void enlarge(register struct parse *p, sopno size);
*/
-static int /* value, <= DUPMAX */
-scannum(struct vars * v)
+static void
+enlarge(p, size)
+register struct parse *p;
+register sopno size;
{
- int n = 0;
+ register sop *sp;
- while (SEE(DIGIT) && n < DUPMAX)
- {
- n = n * 10 + v->nextvalue;
- NEXT();
+ if (p->ssize >= size)
+ return;
+
+ sp = (sop *)realloc(p->strip, size*sizeof(sop));
+ if (sp == NULL) {
+ SETERROR(REG_ESPACE);
+ return;
}
- if (SEE(DIGIT) || n > DUPMAX)
- {
- ERR(REG_BADBR);
- return 0;
- }
- return n;
+ p->strip = sp;
+ p->ssize = size;
}
/*
- * repeat - replicate subNFA for quantifiers
+ - stripsnug - compact the strip
+ == static void stripsnug(register struct parse *p, register struct re_guts *g);
+ */
+static void
+stripsnug(p, g)
+register struct parse *p;
+register struct re_guts *g;
+{
+ g->nstates = p->slen;
+ g->strip = (sop *)realloc((char *)p->strip, p->slen * sizeof(sop));
+ if (g->strip == NULL) {
+ SETERROR(REG_ESPACE);
+ g->strip = p->strip;
+ }
+}
+
+/*
+ - findmust - fill in must and mlen with longest mandatory literal string
+ == static void findmust(register struct parse *p, register struct re_guts *g);
*
- * The duplication sequences used here are chosen carefully so that any
- * pointers starting out pointing into the subexpression end up pointing into
- * the last occurrence. (Note that it may not be strung between the same
- * left and right end states, however!) This used to be important for the
- * subRE tree, although the important bits are now handled by the in-line
- * code in parse(), and when this is called, it doesn't matter any more.
- */
-static void
-repeat(struct vars * v,
- struct state * lp,
- struct state * rp,
- int m,
- int n)
-{
-#define SOME 2
-#define INF 3
-#define PAIR(x, y) ((x)*4 + (y))
-#define REDUCE(x) ( ((x) == INFINITY) ? INF : (((x) > 1) ? SOME : (x)) )
- const int rm = REDUCE(m);
- const int rn = REDUCE(n);
- struct state *s;
- struct state *s2;
-
- switch (PAIR(rm, rn))
- {
- case PAIR(0, 0): /* empty string */
- delsub(v->nfa, lp, rp);
- EMPTYARC(lp, rp);
- break;
- case PAIR(0, 1): /* do as x| */
- EMPTYARC(lp, rp);
- break;
- case PAIR(0, SOME): /* do as x{1,n}| */
- repeat(v, lp, rp, 1, n);
- NOERR();
- EMPTYARC(lp, rp);
- break;
- case PAIR(0, INF): /* loop x around */
- s = newstate(v->nfa);
- NOERR();
- moveouts(v->nfa, lp, s);
- moveins(v->nfa, rp, s);
- EMPTYARC(lp, s);
- EMPTYARC(s, rp);
- break;
- case PAIR(1, 1): /* no action required */
- break;
- case PAIR(1, SOME): /* do as x{0,n-1}x = (x{1,n-1}|)x */
- s = newstate(v->nfa);
- NOERR();
- moveouts(v->nfa, lp, s);
- dupnfa(v->nfa, s, rp, lp, s);
- NOERR();
- repeat(v, lp, s, 1, n - 1);
- NOERR();
- EMPTYARC(lp, s);
- break;
- case PAIR(1, INF): /* add loopback arc */
- s = newstate(v->nfa);
- s2 = newstate(v->nfa);
- NOERR();
- moveouts(v->nfa, lp, s);
- moveins(v->nfa, rp, s2);
- EMPTYARC(lp, s);
- EMPTYARC(s2, rp);
- EMPTYARC(s2, s);
- break;
- case PAIR(SOME, SOME): /* do as x{m-1,n-1}x */
- s = newstate(v->nfa);
- NOERR();
- moveouts(v->nfa, lp, s);
- dupnfa(v->nfa, s, rp, lp, s);
- NOERR();
- repeat(v, lp, s, m - 1, n - 1);
- break;
- case PAIR(SOME, INF): /* do as x{m-1,}x */
- s = newstate(v->nfa);
- NOERR();
- moveouts(v->nfa, lp, s);
- dupnfa(v->nfa, s, rp, lp, s);
- NOERR();
- repeat(v, lp, s, m - 1, n);
- break;
- default:
- ERR(REG_ASSERT);
- break;
- }
-}
-
-/*
- * bracket - handle non-complemented bracket expression
- * Also called from cbracket for complemented bracket expressions.
- */
-static void
-bracket(struct vars * v,
- struct state * lp,
- struct state * rp)
-{
- assert(SEE('['));
- NEXT();
- while (!SEE(']') && !SEE(EOS))
- brackpart(v, lp, rp);
- assert(SEE(']') || ISERR());
- okcolors(v->nfa, v->cm);
-}
-
-/*
- * cbracket - handle complemented bracket expression
- * We do it by calling bracket() with dummy endpoints, and then complementing
- * the result. The alternative would be to invoke rainbow(), and then delete
- * arcs as the b.e. is seen... but that gets messy.
- */
-static void
-cbracket(struct vars * v,
- struct state * lp,
- struct state * rp)
-{
- struct state *left = newstate(v->nfa);
- struct state *right = newstate(v->nfa);
- struct state *s;
- struct arc *a; /* arc from lp */
- struct arc *ba; /* arc from left, from bracket() */
- struct arc *pa; /* MCCE-prototype arc */
- color co;
- chr *p;
- int i;
-
- NOERR();
- bracket(v, left, right);
- if (v->cflags & REG_NLSTOP)
- newarc(v->nfa, PLAIN, v->nlcolor, left, right);
- NOERR();
-
- assert(lp->nouts == 0); /* all outarcs will be ours */
-
- /* easy part of complementing */
- colorcomplement(v->nfa, v->cm, PLAIN, left, lp, rp);
- NOERR();
- if (v->mcces == NULL)
- { /* no MCCEs -- we're done */
- dropstate(v->nfa, left);
- assert(right->nins == 0);
- freestate(v->nfa, right);
- return;
- }
-
- /* but complementing gets messy in the presence of MCCEs... */
- NOTE(REG_ULOCALE);
- for (p = v->mcces->chrs, i = v->mcces->nchrs; i > 0; p++, i--)
- {
- co = GETCOLOR(v->cm, *p);
- a = findarc(lp, PLAIN, co);
- ba = findarc(left, PLAIN, co);
- if (ba == NULL)
- {
- assert(a != NULL);
- freearc(v->nfa, a);
- }
- else
- assert(a == NULL);
- s = newstate(v->nfa);
- NOERR();
- newarc(v->nfa, PLAIN, co, lp, s);
- NOERR();
- pa = findarc(v->mccepbegin, PLAIN, co);
- assert(pa != NULL);
- if (ba == NULL)
- { /* easy case, need all of them */
- cloneouts(v->nfa, pa->to, s, rp, PLAIN);
- newarc(v->nfa, '$', 1, s, rp);
- newarc(v->nfa, '$', 0, s, rp);
- colorcomplement(v->nfa, v->cm, AHEAD, pa->to, s, rp);
- }
- else
- { /* must be selective */
- if (findarc(ba->to, '$', 1) == NULL)
- {
- newarc(v->nfa, '$', 1, s, rp);
- newarc(v->nfa, '$', 0, s, rp);
- colorcomplement(v->nfa, v->cm, AHEAD, pa->to,
- s, rp);
- }
- for (pa = pa->to->outs; pa != NULL; pa = pa->outchain)
- if (findarc(ba->to, PLAIN, pa->co) == NULL)
- newarc(v->nfa, PLAIN, pa->co, s, rp);
- if (s->nouts == 0) /* limit of selectivity: none */
- dropstate(v->nfa, s); /* frees arc too */
- }
- NOERR();
- }
-
- delsub(v->nfa, left, right);
- assert(left->nouts == 0);
- freestate(v->nfa, left);
- assert(right->nins == 0);
- freestate(v->nfa, right);
-}
-
-/*
- * brackpart - handle one item (or range) within a bracket expression
- */
-static void
-brackpart(struct vars * v,
- struct state * lp,
- struct state * rp)
-{
- celt startc;
- celt endc;
- struct cvec *cv;
- chr *startp;
- chr *endp;
- chr c[1];
-
- /* parse something, get rid of special cases, take shortcuts */
- switch (v->nexttype)
- {
- case RANGE: /* a-b-c or other botch */
- ERR(REG_ERANGE);
- return;
- break;
- case PLAIN:
- c[0] = v->nextvalue;
- NEXT();
- /* shortcut for ordinary chr (not range, not MCCE leader) */
- if (!SEE(RANGE) && !ISCELEADER(v, c[0]))
- {
- onechr(v, c[0], lp, rp);
- return;
- }
- startc = element(v, c, c + 1);
- NOERR();
- break;
- case COLLEL:
- startp = v->now;
- endp = scanplain(v);
- INSIST(startp < endp, REG_ECOLLATE);
- NOERR();
- startc = element(v, startp, endp);
- NOERR();
- break;
- case ECLASS:
- startp = v->now;
- endp = scanplain(v);
- INSIST(startp < endp, REG_ECOLLATE);
- NOERR();
- startc = element(v, startp, endp);
- NOERR();
- cv = eclass(v, startc, (v->cflags & REG_ICASE));
- NOERR();
- dovec(v, cv, lp, rp);
- return;
- break;
- case CCLASS:
- startp = v->now;
- endp = scanplain(v);
- INSIST(startp < endp, REG_ECTYPE);
- NOERR();
- cv = cclass(v, startp, endp, (v->cflags & REG_ICASE));
- NOERR();
- dovec(v, cv, lp, rp);
- return;
- break;
- default:
- ERR(REG_ASSERT);
- return;
- break;
- }
-
- if (SEE(RANGE))
- {
- NEXT();
- switch (v->nexttype)
- {
- case PLAIN:
- case RANGE:
- c[0] = v->nextvalue;
- NEXT();
- endc = element(v, c, c + 1);
- NOERR();
- break;
- case COLLEL:
- startp = v->now;
- endp = scanplain(v);
- INSIST(startp < endp, REG_ECOLLATE);
- NOERR();
- endc = element(v, startp, endp);
- NOERR();
- break;
- default:
- ERR(REG_ERANGE);
- return;
- break;
- }
- }
- else
- endc = startc;
-
- /*
- * Ranges are unportable. Actually, standard C does guarantee that
- * digits are contiguous, but making that an exception is just too
- * complicated.
- */
- if (startc != endc)
- NOTE(REG_UUNPORT);
- cv = range(v, startc, endc, (v->cflags & REG_ICASE));
- NOERR();
- dovec(v, cv, lp, rp);
-}
-
-/*
- * scanplain - scan PLAIN contents of [. etc.
+ * This algorithm could do fancy things like analyzing the operands of |
+ * for common subsequences. Someday. This code is simple and finds most
+ * of the interesting cases.
*
- * Certain bits of trickery in lex.c know that this code does not try
- * to look past the final bracket of the [. etc.
- */
-static chr * /* just after end of sequence */
-scanplain(struct vars * v)
-{
- chr *endp;
-
- assert(SEE(COLLEL) || SEE(ECLASS) || SEE(CCLASS));
- NEXT();
-
- endp = v->now;
- while (SEE(PLAIN))
- {
- endp = v->now;
- NEXT();
- }
-
- assert(SEE(END) || ISERR());
- NEXT();
-
- return endp;
-}
-
-/*
- * leaders - process a cvec of collating elements to also include leaders
- * Also gives all characters involved their own colors, which is almost
- * certainly necessary, and sets up little disconnected subNFA.
+ * Note that must and mlen got initialized during setup.
*/
static void
-leaders(struct vars * v,
- struct cvec * cv)
+findmust(p, g)
+struct parse *p;
+register struct re_guts *g;
{
- int mcce;
- chr *p;
- chr leader;
- struct state *s;
- struct arc *a;
+ register sop *scan;
+ sop *start;
+ register sop *newstart;
+ register sopno newlen;
+ register sop s;
+ register char *cp;
+ register sopno i;
- v->mccepbegin = newstate(v->nfa);
- v->mccepend = newstate(v->nfa);
- NOERR();
-
- for (mcce = 0; mcce < cv->nmcces; mcce++)
- {
- p = cv->mcces[mcce];
- leader = *p;
- if (!haschr(cv, leader))
- {
- addchr(cv, leader);
- s = newstate(v->nfa);
- newarc(v->nfa, PLAIN, subcolor(v->cm, leader),
- v->mccepbegin, s);
- okcolors(v->nfa, v->cm);
- }
- else
- {
- a = findarc(v->mccepbegin, PLAIN,
- GETCOLOR(v->cm, leader));
- assert(a != NULL);
- s = a->to;
- assert(s != v->mccepend);
- }
- p++;
- assert(*p != 0 && *(p + 1) == 0); /* only 2-char MCCEs for
- * now */
- newarc(v->nfa, PLAIN, subcolor(v->cm, *p), s, v->mccepend);
- okcolors(v->nfa, v->cm);
- }
-}
-
-/*
- * onechr - fill in arcs for a plain character, and possible case complements
- * This is mostly a shortcut for efficient handling of the common case.
- */
-static void
-onechr(struct vars * v,
- chr c,
- struct state * lp,
- struct state * rp)
-{
- if (!(v->cflags & REG_ICASE))
- {
- newarc(v->nfa, PLAIN, subcolor(v->cm, c), lp, rp);
- return;
- }
-
- /* rats, need general case anyway... */
- dovec(v, allcases(v, c), lp, rp);
-}
-
-/*
- * dovec - fill in arcs for each element of a cvec
- * This one has to handle the messy cases, like MCCEs and MCCE leaders.
- */
-static void
-dovec(struct vars * v,
- struct cvec * cv,
- struct state * lp,
- struct state * rp)
-{
- chr ch,
- from,
- to;
- celt ce;
- chr *p;
- int i;
- color co;
- struct cvec *leads;
- struct arc *a;
- struct arc *pa; /* arc in prototype */
- struct state *s;
- struct state *ps; /* state in prototype */
-
- /* need a place to store leaders, if any */
- if (nmcces(v) > 0)
- {
- assert(v->mcces != NULL);
- if (v->cv2 == NULL || v->cv2->nchrs < v->mcces->nchrs)
- {
- if (v->cv2 != NULL)
- free(v->cv2);
- v->cv2 = newcvec(v->mcces->nchrs, 0, v->mcces->nmcces);
- NOERR();
- leads = v->cv2;
- }
- else
- leads = clearcvec(v->cv2);
- }
- else
- leads = NULL;
-
- /* first, get the ordinary characters out of the way */
- for (p = cv->chrs, i = cv->nchrs; i > 0; p++, i--)
- {
- ch = *p;
- if (!ISCELEADER(v, ch))
- newarc(v->nfa, PLAIN, subcolor(v->cm, ch), lp, rp);
- else
- {
- assert(singleton(v->cm, ch));
- assert(leads != NULL);
- if (!haschr(leads, ch))
- addchr(leads, ch);
- }
- }
-
- /* and the ranges */
- for (p = cv->ranges, i = cv->nranges; i > 0; p += 2, i--)
- {
- from = *p;
- to = *(p + 1);
- while (from <= to && (ce = nextleader(v, from, to)) != NOCELT)
- {
- if (from < ce)
- subrange(v, from, ce - 1, lp, rp);
- assert(singleton(v->cm, ce));
- assert(leads != NULL);
- if (!haschr(leads, ce))
- addchr(leads, ce);
- from = ce + 1;
- }
- if (from <= to)
- subrange(v, from, to, lp, rp);
- }
-
- if ((leads == NULL || leads->nchrs == 0) && cv->nmcces == 0)
+ /* avoid making error situations worse */
+ if (p->error != 0)
return;
- /* deal with the MCCE leaders */
- NOTE(REG_ULOCALE);
- for (p = leads->chrs, i = leads->nchrs; i > 0; p++, i--)
- {
- co = GETCOLOR(v->cm, *p);
- a = findarc(lp, PLAIN, co);
- if (a != NULL)
- s = a->to;
- else
- {
- s = newstate(v->nfa);
- NOERR();
- newarc(v->nfa, PLAIN, co, lp, s);
- NOERR();
+ /* find the longest OCHAR sequence in strip */
+ newlen = 0;
+ scan = g->strip + 1;
+ do {
+ s = *scan++;
+ switch (OP(s)) {
+ case OCHAR: /* sequence member */
+ if (newlen == 0) /* new sequence */
+ newstart = scan - 1;
+ newlen++;
+ break;
+ case OPLUS_: /* things that don't break one */
+ case OLPAREN:
+ case ORPAREN:
+ break;
+ case OQUEST_: /* things that must be skipped */
+ case OCH_:
+ scan--;
+ do {
+ scan += OPND(s);
+ s = *scan;
+ /* assert() interferes w debug printouts */
+ if (OP(s) != O_QUEST && OP(s) != O_CH &&
+ OP(s) != OOR2) {
+ g->iflags |= BAD;
+ return;
+ }
+ } while (OP(s) != O_QUEST && OP(s) != O_CH);
+ /* fallthrough */
+ default: /* things that break a sequence */
+ if (newlen > g->mlen) { /* ends one */
+ start = newstart;
+ g->mlen = newlen;
+ }
+ newlen = 0;
+ break;
}
- pa = findarc(v->mccepbegin, PLAIN, co);
- assert(pa != NULL);
- ps = pa->to;
- newarc(v->nfa, '$', 1, s, rp);
- newarc(v->nfa, '$', 0, s, rp);
- colorcomplement(v->nfa, v->cm, AHEAD, ps, s, rp);
- NOERR();
- }
+ } while (OP(s) != OEND);
- /* and the MCCEs */
- for (i = 0; i < cv->nmcces; i++)
- {
- p = cv->mcces[i];
- assert(singleton(v->cm, *p));
- if (!singleton(v->cm, *p))
- {
- ERR(REG_ASSERT);
- return;
- }
- ch = *p++;
- co = GETCOLOR(v->cm, ch);
- a = findarc(lp, PLAIN, co);
- if (a != NULL)
- s = a->to;
- else
- {
- s = newstate(v->nfa);
- NOERR();
- newarc(v->nfa, PLAIN, co, lp, s);
- NOERR();
- }
- assert(*p != 0); /* at least two chars */
- assert(singleton(v->cm, *p));
- ch = *p++;
- co = GETCOLOR(v->cm, ch);
- assert(*p == 0); /* and only two, for now */
- newarc(v->nfa, PLAIN, co, s, rp);
- NOERR();
- }
-}
-
-/*
- * nextleader - find next MCCE leader within range
- */
-static celt /* NOCELT means none */
-nextleader(struct vars * v,
- chr from,
- chr to)
-{
- int i;
- chr *p;
- chr ch;
- celt it = NOCELT;
-
- if (v->mcces == NULL)
- return it;
-
- for (i = v->mcces->nchrs, p = v->mcces->chrs; i > 0; i--, p++)
- {
- ch = *p;
- if (from <= ch && ch <= to)
- if (it == NOCELT || ch < it)
- it = ch;
- }
- return it;
-}
-
-/*
- * wordchrs - set up word-chr list for word-boundary stuff, if needed
- *
- * The list is kept as a bunch of arcs between two dummy states; it's
- * disposed of by the unreachable-states sweep in NFA optimization.
- * Does NEXT(). Must not be called from any unusual lexical context.
- * This should be reconciled with the \w etc. handling in lex.c, and
- * should be cleaned up to reduce dependencies on input scanning.
- */
-static void
-wordchrs(struct vars * v)
-{
- struct state *left;
- struct state *right;
-
- if (v->wordchrs != NULL)
- {
- NEXT(); /* for consistency */
- return;
- }
-
- left = newstate(v->nfa);
- right = newstate(v->nfa);
- NOERR();
- /* fine point: implemented with [::], and lexer will set REG_ULOCALE */
- lexword(v);
- NEXT();
- assert(v->savenow != NULL && SEE('['));
- bracket(v, left, right);
- assert((v->savenow != NULL && SEE(']')) || ISERR());
- NEXT();
- NOERR();
- v->wordchrs = left;
-}
-
-/*
- * subre - allocate a subre
- */
-static struct subre *
-subre(struct vars * v,
- int op,
- int flags,
- struct state * begin,
- struct state * end)
-{
- struct subre *ret;
-
- ret = v->treefree;
- if (ret != NULL)
- v->treefree = ret->left;
- else
- {
- ret = (struct subre *) MALLOC(sizeof(struct subre));
- if (ret == NULL)
- {
- ERR(REG_ESPACE);
- return NULL;
- }
- ret->chain = v->treechain;
- v->treechain = ret;
- }
-
- assert(strchr("|.b(=", op) != NULL);
-
- ret->op = op;
- ret->flags = flags;
- ret->retry = 0;
- ret->subno = 0;
- ret->min = ret->max = 1;
- ret->left = NULL;
- ret->right = NULL;
- ret->begin = begin;
- ret->end = end;
- ZAPCNFA(ret->cnfa);
-
- return ret;
-}
-
-/*
- * freesubre - free a subRE subtree
- */
-static void
-freesubre(struct vars * v, /* might be NULL */
- struct subre * sr)
-{
- if (sr == NULL)
+ if (g->mlen == 0) /* there isn't one */
return;
- if (sr->left != NULL)
- freesubre(v, sr->left);
- if (sr->right != NULL)
- freesubre(v, sr->right);
-
- freesrnode(v, sr);
-}
-
-/*
- * freesrnode - free one node in a subRE subtree
- */
-static void
-freesrnode(struct vars * v, /* might be NULL */
- struct subre * sr)
-{
- if (sr == NULL)
+ /* turn it into a character string */
+ g->must = malloc((size_t)g->mlen + 1);
+ if (g->must == NULL) { /* argh; just forget it */
+ g->mlen = 0;
return;
-
- if (!NULLCNFA(sr->cnfa))
- freecnfa(&sr->cnfa);
- sr->flags = 0;
-
- if (v != NULL)
- {
- sr->left = v->treefree;
- v->treefree = sr;
}
- else
- FREE(sr);
+ cp = g->must;
+ scan = start;
+ for (i = g->mlen; i > 0; i--) {
+ while (OP(s = *scan++) != OCHAR)
+ continue;
+ assert(cp < g->must + g->mlen);
+ *cp++ = (char)OPND(s);
+ }
+ assert(cp == g->must + g->mlen);
+ *cp++ = '\0'; /* just on general principles */
}
/*
- * optst - optimize a subRE subtree
+ - pluscount - count + nesting
+ == static sopno pluscount(register struct parse *p, register struct re_guts *g);
*/
-static void
-optst(struct vars * v,
- struct subre * t)
+static sopno /* nesting depth */
+pluscount(p, g)
+struct parse *p;
+register struct re_guts *g;
{
- if (t == NULL)
- return;
+ register sop *scan;
+ register sop s;
+ register sopno plusnest = 0;
+ register sopno maxnest = 0;
- /* recurse through children */
- if (t->left != NULL)
- optst(v, t->left);
- if (t->right != NULL)
- optst(v, t->right);
-}
+ if (p->error != 0)
+ return(0); /* there may not be an OEND */
-/*
- * numst - number tree nodes (assigning retry indexes)
- */
-static int /* next number */
-numst(struct subre * t,
- int start) /* starting point for subtree numbers */
-{
- int i;
-
- assert(t != NULL);
-
- i = start;
- t->retry = (short) i++;
- if (t->left != NULL)
- i = numst(t->left, i);
- if (t->right != NULL)
- i = numst(t->right, i);
- return i;
-}
-
-/*
- * markst - mark tree nodes as INUSE
- */
-static void
-markst(struct subre * t)
-{
- assert(t != NULL);
-
- t->flags |= INUSE;
- if (t->left != NULL)
- markst(t->left);
- if (t->right != NULL)
- markst(t->right);
-}
-
-/*
- * cleanst - free any tree nodes not marked INUSE
- */
-static void
-cleanst(struct vars * v)
-{
- struct subre *t;
- struct subre *next;
-
- for (t = v->treechain; t != NULL; t = next)
- {
- next = t->chain;
- if (!(t->flags & INUSE))
- FREE(t);
+ scan = g->strip + 1;
+ do {
+ s = *scan++;
+ switch (OP(s)) {
+ case OPLUS_:
+ plusnest++;
+ break;
+ case O_PLUS:
+ if (plusnest > maxnest)
+ maxnest = plusnest;
+ plusnest--;
+ break;
}
- v->treechain = NULL;
- v->treefree = NULL; /* just on general principles */
+ } while (OP(s) != OEND);
+ if (plusnest != 0)
+ g->iflags |= BAD;
+ return(maxnest);
}
-
-/*
- * nfatree - turn a subRE subtree into a tree of compacted NFAs
- */
-static long /* optimize results from top node */
-nfatree(struct vars * v,
- struct subre * t,
- FILE *f) /* for debug output */
-{
- assert(t != NULL && t->begin != NULL);
-
- if (t->left != NULL)
- (DISCARD) nfatree(v, t->left, f);
- if (t->right != NULL)
- (DISCARD) nfatree(v, t->right, f);
-
- return nfanode(v, t, f);
-}
-
-/*
- * nfanode - do one NFA for nfatree
- */
-static long /* optimize results */
-nfanode(struct vars * v,
- struct subre * t,
- FILE *f) /* for debug output */
-{
- struct nfa *nfa;
- long ret = 0;
-
- assert(t->begin != NULL);
-
-#ifdef REG_DEBUG
- if (f != NULL)
- {
- char idbuf[50];
-
- fprintf(f, "\n\n\n========= TREE NODE %s ==========\n",
- stid(t, idbuf, sizeof(idbuf)));
- }
-#endif
- nfa = newnfa(v, v->cm, v->nfa);
- NOERRZ();
- dupnfa(nfa, t->begin, t->end, nfa->init, nfa->final);
- if (!ISERR())
- {
- specialcolors(nfa);
- ret = optimize(nfa, f);
- }
- if (!ISERR())
- compact(nfa, &t->cnfa);
-
- freenfa(nfa);
- return ret;
-}
-
-/*
- * newlacon - allocate a lookahead-constraint subRE
- */
-static int /* lacon number */
-newlacon(struct vars * v,
- struct state * begin,
- struct state * end,
- int pos)
-{
- int n;
- struct subre *sub;
-
- if (v->nlacons == 0)
- {
- v->lacons = (struct subre *) MALLOC(2 * sizeof(struct subre));
- n = 1; /* skip 0th */
- v->nlacons = 2;
- }
- else
- {
- v->lacons = (struct subre *) REALLOC(v->lacons,
- (v->nlacons + 1) * sizeof(struct subre));
- n = v->nlacons++;
- }
- if (v->lacons == NULL)
- {
- ERR(REG_ESPACE);
- return 0;
- }
- sub = &v->lacons[n];
- sub->begin = begin;
- sub->end = end;
- sub->subno = pos;
- ZAPCNFA(sub->cnfa);
- return n;
-}
-
-/*
- * freelacons - free lookahead-constraint subRE vector
- */
-static void
-freelacons(struct subre * subs,
- int n)
-{
- struct subre *sub;
- int i;
-
- assert(n > 0);
- for (sub = subs + 1, i = n - 1; i > 0; sub++, i--) /* no 0th */
- if (!NULLCNFA(sub->cnfa))
- freecnfa(&sub->cnfa);
- FREE(subs);
-}
-
-/*
- * rfree - free a whole RE (insides of regfree)
- */
-static void
-rfree(regex_t *re)
-{
- struct guts *g;
-
- if (re == NULL || re->re_magic != REMAGIC)
- return;
-
- re->re_magic = 0; /* invalidate RE */
- g = (struct guts *) re->re_guts;
- re->re_guts = NULL;
- re->re_fns = NULL;
- g->magic = 0;
- freecm(&g->cmap);
- if (g->tree != NULL)
- freesubre((struct vars *) NULL, g->tree);
- if (g->lacons != NULL)
- freelacons(g->lacons, g->nlacons);
- if (!NULLCNFA(g->search))
- freecnfa(&g->search);
- FREE(g);
-}
-
-#ifdef REG_DEBUG
-
-/*
- * dump - dump an RE in human-readable form
- */
-static void
-dump(regex_t *re,
- FILE *f)
-{
- struct guts *g;
- int i;
-
- if (re->re_magic != REMAGIC)
- fprintf(f, "bad magic number (0x%x not 0x%x)\n", re->re_magic,
- REMAGIC);
- if (re->re_guts == NULL)
- {
- fprintf(f, "NULL guts!!!\n");
- return;
- }
- g = (struct guts *) re->re_guts;
- if (g->magic != GUTSMAGIC)
- fprintf(f, "bad guts magic number (0x%x not 0x%x)\n", g->magic,
- GUTSMAGIC);
-
- fprintf(f, "\n\n\n========= DUMP ==========\n");
- fprintf(f, "nsub %d, info 0%lo, csize %d, ntree %d\n",
- re->re_nsub, re->re_info, re->re_csize, g->ntree);
-
- dumpcolors(&g->cmap, f);
- if (!NULLCNFA(g->search))
- {
- printf("\nsearch:\n");
- dumpcnfa(&g->search, f);
- }
- for (i = 1; i < g->nlacons; i++)
- {
- fprintf(f, "\nla%d (%s):\n", i,
- (g->lacons[i].subno) ? "positive" : "negative");
- dumpcnfa(&g->lacons[i].cnfa, f);
- }
- fprintf(f, "\n");
- dumpst(g->tree, f, 0);
-}
-
-/*
- * dumpst - dump a subRE tree
- */
-static void
-dumpst(struct subre * t,
- FILE *f,
- int nfapresent) /* is the original NFA still around? */
-{
- if (t == NULL)
- fprintf(f, "null tree\n");
- else
- stdump(t, f, nfapresent);
- fflush(f);
-}
-
-/*
- * stdump - recursive guts of dumpst
- */
-static void
-stdump(struct subre * t,
- FILE *f,
- int nfapresent) /* is the original NFA still around? */
-{
- char idbuf[50];
-
- fprintf(f, "%s. `%c'", stid(t, idbuf, sizeof(idbuf)), t->op);
- if (t->flags & LONGER)
- fprintf(f, " longest");
- if (t->flags & SHORTER)
- fprintf(f, " shortest");
- if (t->flags & MIXED)
- fprintf(f, " hasmixed");
- if (t->flags & CAP)
- fprintf(f, " hascapture");
- if (t->flags & BACKR)
- fprintf(f, " hasbackref");
- if (!(t->flags & INUSE))
- fprintf(f, " UNUSED");
- if (t->subno != 0)
- fprintf(f, " (#%d)", t->subno);
- if (t->min != 1 || t->max != 1)
- {
- fprintf(f, " {%d,", t->min);
- if (t->max != INFINITY)
- fprintf(f, "%d", t->max);
- fprintf(f, "}");
- }
- if (nfapresent)
- fprintf(f, " %ld-%ld", (long) t->begin->no, (long) t->end->no);
- if (t->left != NULL)
- fprintf(f, " L:%s", stid(t->left, idbuf, sizeof(idbuf)));
- if (t->right != NULL)
- fprintf(f, " R:%s", stid(t->right, idbuf, sizeof(idbuf)));
- if (!NULLCNFA(t->cnfa))
- {
- fprintf(f, "\n");
- dumpcnfa(&t->cnfa, f);
- fprintf(f, "\n");
- }
- if (t->left != NULL)
- stdump(t->left, f, nfapresent);
- if (t->right != NULL)
- stdump(t->right, f, nfapresent);
-}
-
-/*
- * stid - identify a subtree node for dumping
- */
-static char * /* points to buf or constant string */
-stid(struct subre * t,
- char *buf,
- size_t bufsize)
-{
- /* big enough for hex int or decimal t->retry? */
- if (bufsize < sizeof(int) * 2 + 3 || bufsize < sizeof(t->retry) * 3 + 1)
- return "unable";
- if (t->retry != 0)
- sprintf(buf, "%d", t->retry);
- else
- sprintf(buf, "0x%x", (int) t); /* may lose bits, that's okay */
- return buf;
-}
-#endif /* REG_DEBUG */
-
-
-#include "regc_lex.c"
-#include "regc_color.c"
-#include "regc_nfa.c"
-#include "regc_cvec.c"
-#include "regc_locale.c"
diff --git a/src/regex/regcustom.h b/src/regex/regcustom.h
deleted file mode 100644
index 15ea15f495..0000000000
--- a/src/regex/regcustom.h
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved.
- *
- * Development of this software was funded, in part, by Cray Research Inc.,
- * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
- * Corporation, none of whom are responsible for the results. The author
- * thanks all of them.
- *
- * Redistribution and use in source and binary forms -- with or without
- * modification -- are permitted for any purpose, provided that
- * redistributions in source form retain this entire copyright notice and
- * indicate the origin and nature of any modifications.
- *
- * I'd appreciate being given credit for this package in the documentation
- * of software which uses it, but that is not a requirement.
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
- * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
- * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
- * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
- * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
- * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
- * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
- * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * $Id$
- */
-
-/* headers if any */
-#include
-#include
-#include
-#include
-#include
-#ifndef wxCHECK_GCC_VERSION
-#define wxCHECK_GCC_VERSION( major, minor ) \
- ( defined(__GNUC__) && defined(__GNUC_MINOR__) \
- && ( ( __GNUC__ > (major) ) \
- || ( __GNUC__ == (major) && __GNUC_MINOR__ >= (minor) ) ) )
-#endif
-
-#ifdef wxUSE_NEW_REGEX
-
- #if !wxUSE_UNICODE
- # define wx_wchar char
- #else // Unicode
- #if (defined(__GNUC__) && !wxCHECK_GCC_VERSION(2, 96))
- # define wx_wchar __WCHAR_TYPE__
- #else // __WCHAR_TYPE__ and gcc < 2.96
- // standard case
- # define wx_wchar wchar_t
- #endif // __WCHAR_TYPE__
- #endif // ASCII/Unicode
-
-#else
-
- #define wx_wchar char
-
-#endif
-
-
-/* overrides for regguts.h definitions, if any */
-#define FUNCPTR(name, args) (*name) args
-#define MALLOC(n) malloc(n)
-#define FREE(p) free(VS(p))
-#define REALLOC(p,n) realloc(VS(p),n)
-
-/* internal character type and related */
-typedef wx_wchar chr; /* the type itself */
-typedef unsigned uchr; /* unsigned type that will hold a chr */
-typedef int celt; /* type to hold chr, MCCE number, or
- * NOCELT */
-
-#define NOCELT (-1) /* celt value which is not valid chr or
- * MCCE */
-#define CHR(c) ((unsigned char) (c)) /* turn char literal into chr
- * literal */
-#define DIGITVAL(c) ((c)-'0') /* turn chr digit into its value */
-#define CHRBITS 32 /* bits in a chr; must not use sizeof */
-#define CHR_MIN 0x00000000 /* smallest and largest chr; the value */
-#define CHR_MAX 0xfffffffe /* CHR_MAX-CHR_MIN+1 should fit in uchr */
-
-/* functions operating on chr */
-#define iscalnum(x) wx_isalnum(x)
-#define iscalpha(x) wx_isalpha(x)
-#define iscdigit(x) wx_isdigit(x)
-#define iscspace(x) wx_isspace(x)
-
-/* and pick up the standard header */
-#include "regex.h"
diff --git a/src/regex/rege_dfa.c b/src/regex/rege_dfa.c
deleted file mode 100644
index 5347b90d73..0000000000
--- a/src/regex/rege_dfa.c
+++ /dev/null
@@ -1,699 +0,0 @@
-/*
- * DFA routines
- * This file is #included by regexec.c.
- *
- * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved.
- *
- * Development of this software was funded, in part, by Cray Research Inc.,
- * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
- * Corporation, none of whom are responsible for the results. The author
- * thanks all of them.
- *
- * Redistribution and use in source and binary forms -- with or without
- * modification -- are permitted for any purpose, provided that
- * redistributions in source form retain this entire copyright notice and
- * indicate the origin and nature of any modifications.
- *
- * I'd appreciate being given credit for this package in the documentation
- * of software which uses it, but that is not a requirement.
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
- * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
- * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
- * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
- * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
- * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
- * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
- * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * $Header$
- *
- */
-
-/*
- * longest - longest-preferred matching engine
- */
-static chr * /* endpoint, or NULL */
-longest(struct vars * v, /* used only for debug and exec flags */
- struct dfa * d,
- chr *start, /* where the match should start */
- chr *stop, /* match must end at or before here */
- int *hitstopp) /* record whether hit v->stop, if non-NULL */
-{
- chr *cp;
- chr *realstop = (stop == v->stop) ? stop : stop + 1;
- color co;
- struct sset *css;
- struct sset *ss;
- chr *post;
- int i;
- struct colormap *cm = d->cm;
-
- /* initialize */
- css = initialize(v, d, start);
- cp = start;
- if (hitstopp != NULL)
- *hitstopp = 0;
-
- /* startup */
- FDEBUG(("+++ startup +++\n"));
- if (cp == v->start)
- {
- co = d->cnfa->bos[(v->eflags & REG_NOTBOL) ? 0 : 1];
- FDEBUG(("color %ld\n", (long) co));
- }
- else
- {
- co = GETCOLOR(cm, *(cp - 1));
- FDEBUG(("char %c, color %ld\n", (char) *(cp - 1), (long) co));
- }
- css = miss(v, d, css, co, cp, start);
- if (css == NULL)
- return NULL;
- css->lastseen = cp;
-
- /* main loop */
- if (v->eflags & REG_FTRACE)
- while (cp < realstop)
- {
- FDEBUG(("+++ at c%d +++\n", css - d->ssets));
- co = GETCOLOR(cm, *cp);
- FDEBUG(("char %c, color %ld\n", (char) *cp, (long) co));
- ss = css->outs[co];
- if (ss == NULL)
- {
- ss = miss(v, d, css, co, cp + 1, start);
- if (ss == NULL)
- break; /* NOTE BREAK OUT */
- }
- cp++;
- ss->lastseen = cp;
- css = ss;
- }
- else
- while (cp < realstop)
- {
- co = GETCOLOR(cm, *cp);
- ss = css->outs[co];
- if (ss == NULL)
- {
- ss = miss(v, d, css, co, cp + 1, start);
- if (ss == NULL)
- break; /* NOTE BREAK OUT */
- }
- cp++;
- ss->lastseen = cp;
- css = ss;
- }
-
- /* shutdown */
- FDEBUG(("+++ shutdown at c%d +++\n", css - d->ssets));
- if (cp == v->stop && stop == v->stop)
- {
- if (hitstopp != NULL)
- *hitstopp = 1;
- co = d->cnfa->eos[(v->eflags & REG_NOTEOL) ? 0 : 1];
- FDEBUG(("color %ld\n", (long) co));
- ss = miss(v, d, css, co, cp, start);
- /* special case: match ended at eol? */
- if (ss != NULL && (ss->flags & POSTSTATE))
- return cp;
- else if (ss != NULL)
- ss->lastseen = cp; /* to be tidy */
- }
-
- /* find last match, if any */
- post = d->lastpost;
- for (ss = d->ssets, i = d->nssused; i > 0; ss++, i--)
- if ((ss->flags & POSTSTATE) && post != ss->lastseen &&
- (post == NULL || post < ss->lastseen))
- post = ss->lastseen;
- if (post != NULL) /* found one */
- return post - 1;
-
- return NULL;
-}
-
-/*
- * shortest - shortest-preferred matching engine
- */
-static chr * /* endpoint, or NULL */
-shortest(struct vars * v,
- struct dfa * d,
- chr *start, /* where the match should start */
- chr *min, /* match must end at or after here */
- chr *max, /* match must end at or before here */
- chr **coldp, /* store coldstart pointer here, if
- * nonNULL */
- int *hitstopp) /* record whether hit v->stop, if non-NULL */
-{
- chr *cp;
- chr *realmin = (min == v->stop) ? min : min + 1;
- chr *realmax = (max == v->stop) ? max : max + 1;
- color co;
- struct sset *css;
- struct sset *ss;
- struct colormap *cm = d->cm;
-
- /* initialize */
- css = initialize(v, d, start);
- cp = start;
- if (hitstopp != NULL)
- *hitstopp = 0;
-
- /* startup */
- FDEBUG(("--- startup ---\n"));
- if (cp == v->start)
- {
- co = d->cnfa->bos[(v->eflags & REG_NOTBOL) ? 0 : 1];
- FDEBUG(("color %ld\n", (long) co));
- }
- else
- {
- co = GETCOLOR(cm, *(cp - 1));
- FDEBUG(("char %c, color %ld\n", (char) *(cp - 1), (long) co));
- }
- css = miss(v, d, css, co, cp, start);
- if (css == NULL)
- return NULL;
- css->lastseen = cp;
- ss = css;
-
- /* main loop */
- if (v->eflags & REG_FTRACE)
- while (cp < realmax)
- {
- FDEBUG(("--- at c%d ---\n", css - d->ssets));
- co = GETCOLOR(cm, *cp);
- FDEBUG(("char %c, color %ld\n", (char) *cp, (long) co));
- ss = css->outs[co];
- if (ss == NULL)
- {
- ss = miss(v, d, css, co, cp + 1, start);
- if (ss == NULL)
- break; /* NOTE BREAK OUT */
- }
- cp++;
- ss->lastseen = cp;
- css = ss;
- if ((ss->flags & POSTSTATE) && cp >= realmin)
- break; /* NOTE BREAK OUT */
- }
- else
- while (cp < realmax)
- {
- co = GETCOLOR(cm, *cp);
- ss = css->outs[co];
- if (ss == NULL)
- {
- ss = miss(v, d, css, co, cp + 1, start);
- if (ss == NULL)
- break; /* NOTE BREAK OUT */
- }
- cp++;
- ss->lastseen = cp;
- css = ss;
- if ((ss->flags & POSTSTATE) && cp >= realmin)
- break; /* NOTE BREAK OUT */
- }
-
- if (ss == NULL)
- return NULL;
-
- if (coldp != NULL) /* report last no-progress state set, if
- * any */
- *coldp = lastcold(v, d);
-
- if ((ss->flags & POSTSTATE) && cp > min)
- {
- assert(cp >= realmin);
- cp--;
- }
- else if (cp == v->stop && max == v->stop)
- {
- co = d->cnfa->eos[(v->eflags & REG_NOTEOL) ? 0 : 1];
- FDEBUG(("color %ld\n", (long) co));
- ss = miss(v, d, css, co, cp, start);
- /* match might have ended at eol */
- if ((ss == NULL || !(ss->flags & POSTSTATE)) && hitstopp != NULL)
- *hitstopp = 1;
- }
-
- if (ss == NULL || !(ss->flags & POSTSTATE))
- return NULL;
-
- return cp;
-}
-
-/*
- * lastcold - determine last point at which no progress had been made
- */
-static chr * /* endpoint, or NULL */
-lastcold(struct vars * v,
- struct dfa * d)
-{
- struct sset *ss;
- chr *nopr;
- int i;
-
- nopr = d->lastnopr;
- if (nopr == NULL)
- nopr = v->start;
- for (ss = d->ssets, i = d->nssused; i > 0; ss++, i--)
- if ((ss->flags & NOPROGRESS) && nopr < ss->lastseen)
- nopr = ss->lastseen;
- return nopr;
-}
-
-/*
- * newdfa - set up a fresh DFA
- */
-static struct dfa *
-newdfa(struct vars * v,
- struct cnfa * cnfa,
- struct colormap * cm,
- struct smalldfa * small) /* preallocated space, may be NULL */
-{
- struct dfa *d;
- size_t nss = cnfa->nstates * 2;
- int wordsper = (cnfa->nstates + UBITS - 1) / UBITS;
- struct smalldfa *smallwas = small;
-
- assert(cnfa != NULL && cnfa->nstates != 0);
-
- if (nss <= FEWSTATES && cnfa->ncolors <= FEWCOLORS)
- {
- assert(wordsper == 1);
- if (small == NULL)
- {
- small = (struct smalldfa *) MALLOC(
- sizeof(struct smalldfa));
- if (small == NULL)
- {
- ERR(REG_ESPACE);
- return NULL;
- }
- }
- d = &small->dfa;
- d->ssets = small->ssets;
- d->statesarea = small->statesarea;
- d->work = &d->statesarea[nss];
- d->outsarea = small->outsarea;
- d->incarea = small->incarea;
- d->cptsmalloced = 0;
- d->mallocarea = (smallwas == NULL) ? (char *) small : NULL;
- }
- else
- {
- d = (struct dfa *) MALLOC(sizeof(struct dfa));
- if (d == NULL)
- {
- ERR(REG_ESPACE);
- return NULL;
- }
- d->ssets = (struct sset *) MALLOC(nss * sizeof(struct sset));
- d->statesarea = (unsigned *) MALLOC((nss + WORK) * wordsper *
- sizeof(unsigned));
- d->work = &d->statesarea[nss * wordsper];
- d->outsarea = (struct sset **) MALLOC(nss * cnfa->ncolors *
- sizeof(struct sset *));
- d->incarea = (struct arcp *) MALLOC(nss * cnfa->ncolors *
- sizeof(struct arcp));
- d->cptsmalloced = 1;
- d->mallocarea = (char *) d;
- if (d->ssets == NULL || d->statesarea == NULL ||
- d->outsarea == NULL || d->incarea == NULL)
- {
- freedfa(d);
- ERR(REG_ESPACE);
- return NULL;
- }
- }
-
- d->nssets = (v->eflags & REG_SMALL) ? 7 : nss;
- d->nssused = 0;
- d->nstates = cnfa->nstates;
- d->ncolors = cnfa->ncolors;
- d->wordsper = wordsper;
- d->cnfa = cnfa;
- d->cm = cm;
- d->lastpost = NULL;
- d->lastnopr = NULL;
- d->search = d->ssets;
-
- /* initialization of sset fields is done as needed */
-
- return d;
-}
-
-/*
- * freedfa - free a DFA
- */
-static void
-freedfa(struct dfa * d)
-{
- if (d->cptsmalloced)
- {
- if (d->ssets != NULL)
- FREE(d->ssets);
- if (d->statesarea != NULL)
- FREE(d->statesarea);
- if (d->outsarea != NULL)
- FREE(d->outsarea);
- if (d->incarea != NULL)
- FREE(d->incarea);
- }
-
- if (d->mallocarea != NULL)
- FREE(d->mallocarea);
-}
-
-/*
- * hash - construct a hash code for a bitvector
- *
- * There are probably better ways, but they're more expensive.
- */
-static unsigned
-hash(unsigned *uv,
- int n)
-{
- int i;
- unsigned h;
-
- h = 0;
- for (i = 0; i < n; i++)
- h ^= uv[i];
- return h;
-}
-
-/*
- * initialize - hand-craft a cache entry for startup, otherwise get ready
- */
-static struct sset *
-initialize(struct vars * v, /* used only for debug flags */
- struct dfa * d,
- chr *start)
-{
- struct sset *ss;
- int i;
-
- /* is previous one still there? */
- if (d->nssused > 0 && (d->ssets[0].flags & STARTER))
- ss = &d->ssets[0];
- else
- { /* no, must (re)build it */
- ss = getvacant(v, d, start, start);
- for (i = 0; i < d->wordsper; i++)
- ss->states[i] = 0;
- BSET(ss->states, d->cnfa->pre);
- ss->hash = HASH(ss->states, d->wordsper);
- assert(d->cnfa->pre != d->cnfa->post);
- ss->flags = STARTER | LOCKED | NOPROGRESS;
- /* lastseen dealt with below */
- }
-
- for (i = 0; i < d->nssused; i++)
- d->ssets[i].lastseen = NULL;
- ss->lastseen = start; /* maybe untrue, but harmless */
- d->lastpost = NULL;
- d->lastnopr = NULL;
- return ss;
-}
-
-/*
- * miss - handle a cache miss
- */
-static struct sset * /* NULL if goes to empty set */
-miss(struct vars * v, /* used only for debug flags */
- struct dfa * d,
- struct sset * css,
- pcolor co,
- chr *cp, /* next chr */
- chr *start) /* where the attempt got started */
-{
- struct cnfa *cnfa = d->cnfa;
- int i;
- unsigned h;
- struct carc *ca;
- struct sset *p;
- int ispost;
- int noprogress;
- int gotstate;
- int dolacons;
- int sawlacons;
-
- /* for convenience, we can be called even if it might not be a miss */
- if (css->outs[co] != NULL)
- {
- FDEBUG(("hit\n"));
- return css->outs[co];
- }
- FDEBUG(("miss\n"));
-
- /* first, what set of states would we end up in? */
- for (i = 0; i < d->wordsper; i++)
- d->work[i] = 0;
- ispost = 0;
- noprogress = 1;
- gotstate = 0;
- for (i = 0; i < d->nstates; i++)
- if (ISBSET(css->states, i))
- for (ca = cnfa->states[i] + 1; ca->co != COLORLESS; ca++)
- if (ca->co == co)
- {
- BSET(d->work, ca->to);
- gotstate = 1;
- if (ca->to == cnfa->post)
- ispost = 1;
- if (!cnfa->states[ca->to]->co)
- noprogress = 0;
- FDEBUG(("%d -> %d\n", i, ca->to));
- }
- dolacons = (gotstate) ? (cnfa->flags & HASLACONS) : 0;
- sawlacons = 0;
- while (dolacons)
- { /* transitive closure */
- dolacons = 0;
- for (i = 0; i < d->nstates; i++)
- if (ISBSET(d->work, i))
- for (ca = cnfa->states[i] + 1; ca->co != COLORLESS;
- ca++)
- {
- if (ca->co <= cnfa->ncolors)
- continue; /* NOTE CONTINUE */
- sawlacons = 1;
- if (ISBSET(d->work, ca->to))
- continue; /* NOTE CONTINUE */
- if (!lacon(v, cnfa, cp, ca->co))
- continue; /* NOTE CONTINUE */
- BSET(d->work, ca->to);
- dolacons = 1;
- if (ca->to == cnfa->post)
- ispost = 1;
- if (!cnfa->states[ca->to]->co)
- noprogress = 0;
- FDEBUG(("%d :> %d\n", i, ca->to));
- }
- }
- if (!gotstate)
- return NULL;
- h = HASH(d->work, d->wordsper);
-
- /* next, is that in the cache? */
- for (p = d->ssets, i = d->nssused; i > 0; p++, i--)
- if (HIT(h, d->work, p, d->wordsper))
- {
- FDEBUG(("cached c%d\n", p - d->ssets));
- break; /* NOTE BREAK OUT */
- }
- if (i == 0)
- { /* nope, need a new cache entry */
- p = getvacant(v, d, cp, start);
- assert(p != css);
- for (i = 0; i < d->wordsper; i++)
- p->states[i] = d->work[i];
- p->hash = h;
- p->flags = (ispost) ? POSTSTATE : 0;
- if (noprogress)
- p->flags |= NOPROGRESS;
- /* lastseen to be dealt with by caller */
- }
-
- if (!sawlacons)
- { /* lookahead conds. always cache miss */
- FDEBUG(("c%d[%d]->c%d\n", css - d->ssets, co, p - d->ssets));
- css->outs[co] = p;
- css->inchain[co] = p->ins;
- p->ins.ss = css;
- p->ins.co = (color) co;
- }
- return p;
-}
-
-/*
- * lacon - lookahead-constraint checker for miss()
- */
-static int /* predicate: constraint satisfied? */
-lacon(struct vars * v,
- struct cnfa * pcnfa, /* parent cnfa */
- chr *cp,
- pcolor co) /* "color" of the lookahead constraint */
-{
- int n;
- struct subre *sub;
- struct dfa *d;
- struct smalldfa sd;
- chr *end;
-
- n = co - pcnfa->ncolors;
- assert(n < v->g->nlacons && v->g->lacons != NULL);
- FDEBUG(("=== testing lacon %d\n", n));
- sub = &v->g->lacons[n];
- d = newdfa(v, &sub->cnfa, &v->g->cmap, &sd);
- if (d == NULL)
- {
- ERR(REG_ESPACE);
- return 0;
- }
- end = longest(v, d, cp, v->stop, (int *) NULL);
- freedfa(d);
- FDEBUG(("=== lacon %d match %d\n", n, (end != NULL)));
- return (sub->subno) ? (end != NULL) : (end == NULL);
-}
-
-/*
- * getvacant - get a vacant state set
- * This routine clears out the inarcs and outarcs, but does not otherwise
- * clear the innards of the state set -- that's up to the caller.
- */
-static struct sset *
-getvacant(struct vars * v, /* used only for debug flags */
- struct dfa * d,
- chr *cp,
- chr *start)
-{
- int i;
- struct sset *ss;
- struct sset *p;
- struct arcp ap;
- struct arcp lastap;
- color co;
-
- ss = pickss(v, d, cp, start);
- assert(!(ss->flags & LOCKED));
-
- /* clear out its inarcs, including self-referential ones */
- ap = ss->ins;
- while ((p = ap.ss) != NULL)
- {
- co = ap.co;
- FDEBUG(("zapping c%d's %ld outarc\n", p - d->ssets, (long) co));
- p->outs[co] = NULL;
- ap = p->inchain[co];
- p->inchain[co].ss = NULL; /* paranoia */
- }
- ss->ins.ss = NULL;
-
- /* take it off the inarc chains of the ssets reached by its outarcs */
- for (i = 0; i < d->ncolors; i++)
- {
- p = ss->outs[i];
- assert(p != ss); /* not self-referential */
- if (p == NULL)
- continue; /* NOTE CONTINUE */
- FDEBUG(("del outarc %d from c%d's in chn\n", i, p - d->ssets));
- if (p->ins.ss == ss && p->ins.co == i)
- p->ins = ss->inchain[i];
- else
- {
- assert(p->ins.ss != NULL);
- for (ap = p->ins; ap.ss != NULL &&
- !(ap.ss == ss && ap.co == i);
- ap = ap.ss->inchain[ap.co])
- lastap = ap;
- assert(ap.ss != NULL);
- lastap.ss->inchain[lastap.co] = ss->inchain[i];
- }
- ss->outs[i] = NULL;
- ss->inchain[i].ss = NULL;
- }
-
- /* if ss was a success state, may need to remember location */
- if ((ss->flags & POSTSTATE) && ss->lastseen != d->lastpost &&
- (d->lastpost == NULL || d->lastpost < ss->lastseen))
- d->lastpost = ss->lastseen;
-
- /* likewise for a no-progress state */
- if ((ss->flags & NOPROGRESS) && ss->lastseen != d->lastnopr &&
- (d->lastnopr == NULL || d->lastnopr < ss->lastseen))
- d->lastnopr = ss->lastseen;
-
- return ss;
-}
-
-/*
- * pickss - pick the next stateset to be used
- */
-static struct sset *
-pickss(struct vars * v, /* used only for debug flags */
- struct dfa * d,
- chr *cp,
- chr *start)
-{
- int i;
- struct sset *ss;
- struct sset *end;
- chr *ancient;
-
- /* shortcut for cases where cache isn't full */
- if (d->nssused < d->nssets)
- {
- i = d->nssused;
- d->nssused++;
- ss = &d->ssets[i];
- FDEBUG(("new c%d\n", i));
- /* set up innards */
- ss->states = &d->statesarea[i * d->wordsper];
- ss->flags = 0;
- ss->ins.ss = NULL;
- ss->ins.co = WHITE; /* give it some value */
- ss->outs = &d->outsarea[i * d->ncolors];
- ss->inchain = &d->incarea[i * d->ncolors];
- for (i = 0; i < d->ncolors; i++)
- {
- ss->outs[i] = NULL;
- ss->inchain[i].ss = NULL;
- }
- return ss;
- }
-
- /* look for oldest, or old enough anyway */
- if (cp - start > d->nssets * 2 / 3) /* oldest 33% are expendable */
- ancient = cp - d->nssets * 2 / 3;
- else
- ancient = start;
- for (ss = d->search, end = &d->ssets[d->nssets]; ss < end; ss++)
- if ((ss->lastseen == NULL || ss->lastseen < ancient) &&
- !(ss->flags & LOCKED))
- {
- d->search = ss + 1;
- FDEBUG(("replacing c%d\n", ss - d->ssets));
- return ss;
- }
- for (ss = d->ssets, end = d->search; ss < end; ss++)
- if ((ss->lastseen == NULL || ss->lastseen < ancient) &&
- !(ss->flags & LOCKED))
- {
- d->search = ss + 1;
- FDEBUG(("replacing c%d\n", ss - d->ssets));
- return ss;
- }
-
- /* nobody's old enough?!? -- something's really wrong */
- FDEBUG(("can't find victim to replace!\n"));
- assert(NOTREACHED);
- ERR(REG_ASSERT);
- return d->ssets;
-}
diff --git a/src/regex/regerror.c b/src/regex/regerror.c
index 4fd08556ba..b4c839adf5 100644
--- a/src/regex/regerror.c
+++ b/src/regex/regerror.c
@@ -1,126 +1,130 @@
+#if defined(__MWERKS__) && !defined(__MACH__)
+typedef long off_t ;
+#else
+#include
+#endif
+#include
+#include
+#include
+#include
+#include
+#include "regex.h"
+
+#include "utils.h"
+#include "regerror.ih"
+
/*
- * regerror - error-code expansion
- *
- * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved.
- *
- * Development of this software was funded, in part, by Cray Research Inc.,
- * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
- * Corporation, none of whom are responsible for the results. The author
- * thanks all of them.
- *
- * Redistribution and use in source and binary forms -- with or without
- * modification -- are permitted for any purpose, provided that
- * redistributions in source form retain this entire copyright notice and
- * indicate the origin and nature of any modifications.
- *
- * I'd appreciate being given credit for this package in the documentation
- * of software which uses it, but that is not a requirement.
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
- * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
- * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
- * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
- * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
- * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
- * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
- * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * $Header: /projects/cvsroot/pgsql-server/src/backend/regex/regerror.c,v 1.26 2003/08/04 00:43:21 momjian Exp $
- *
+ = #define REG_OKAY 0
+ = #define REG_NOMATCH 1
+ = #define REG_BADPAT 2
+ = #define REG_ECOLLATE 3
+ = #define REG_ECTYPE 4
+ = #define REG_EESCAPE 5
+ = #define REG_ESUBREG 6
+ = #define REG_EBRACK 7
+ = #define REG_EPAREN 8
+ = #define REG_EBRACE 9
+ = #define REG_BADBR 10
+ = #define REG_ERANGE 11
+ = #define REG_ESPACE 12
+ = #define REG_BADRPT 13
+ = #define REG_EMPTY 14
+ = #define REG_ASSERT 15
+ = #define REG_INVARG 16
+ = #define REG_ATOI 255 // convert name to number (!)
+ = #define REG_ITOA 0400 // convert number to name (!)
*/
-
-#include "regguts.h"
-
-/* unknown-error explanation */
-static char unk[] = "*** unknown regex error code 0x%x ***";
-
-/* struct to map among codes, code names, and explanations */
-static struct rerr
-{
+static struct rerr {
int code;
char *name;
char *explain;
-} rerrs[] =
-
-{
- /* the actual table is built from regex.h */
-#include "regerrs.h"
- {
- -1, "", "oops"
- }, /* explanation special-cased in code */
+} rerrs[] = {
+ { REG_OKAY, "REG_OKAY", "no errors detected" },
+ { REG_NOMATCH, "REG_NOMATCH", "regexec() failed to match" },
+ { REG_BADPAT, "REG_BADPAT", "invalid regular expression" },
+ { REG_ECOLLATE, "REG_ECOLLATE", "invalid collating element" },
+ { REG_ECTYPE, "REG_ECTYPE", "invalid character class" },
+ { REG_EESCAPE, "REG_EESCAPE", "trailing backslash (\\)" },
+ { REG_ESUBREG, "REG_ESUBREG", "invalid backreference number" },
+ { REG_EBRACK, "REG_EBRACK", "brackets ([ ]) not balanced" },
+ { REG_EPAREN, "REG_EPAREN", "parentheses not balanced" },
+ { REG_EBRACE, "REG_EBRACE", "braces not balanced" },
+ { REG_BADBR, "REG_BADBR", "invalid repetition count(s)" },
+ { REG_ERANGE, "REG_ERANGE", "invalid character range" },
+ { REG_ESPACE, "REG_ESPACE", "out of memory" },
+ { REG_BADRPT, "REG_BADRPT", "repetition-operator operand invalid" },
+ { REG_EMPTY, "REG_EMPTY", "empty (sub)expression" },
+ { REG_ASSERT, "REG_ASSERT", "\"can't happen\" -- you found a bug" },
+ { REG_INVARG, "REG_INVARG", "invalid argument to regex routine" },
+ { -1, "", "*** unknown regexp error code ***" },
};
-size_t
-regerror(int errcode, /* error code, or REG_ATOI or REG_ITOA */
- const regex_t *preg, /* associated regex_t (unused at present) */
- char *errbuf, /* result buffer (unless errbuf_size==0) */
- size_t errbuf_size) /* available space in errbuf, can be 0 */
- { return regerror(errcode, preg, errbuf, errbuf_size); }
/*
- * pg_regerror - the interface to error numbers
+ - regerror - the interface to error numbers
+ = extern size_t regerror(int, const regex_t *, char *, size_t);
*/
/* ARGSUSED */
-size_t /* actual space needed (including NUL) */
-wx_regerror(int errcode, /* error code, or REG_ATOI or REG_ITOA */
- const regex_t *preg, /* associated regex_t (unused at present) */
- char *errbuf, /* result buffer (unless errbuf_size==0) */
- size_t errbuf_size) /* available space in errbuf, can be 0 */
+size_t
+regerror(errcode, preg, errbuf, errbuf_size)
+int errcode;
+const regex_t *preg;
+char *errbuf;
+size_t errbuf_size;
{
- struct rerr *r;
- char *msg;
- char convbuf[sizeof(unk) + 50]; /* 50 = plenty for int */
- size_t len;
- int icode;
+ register struct rerr *r;
+ register size_t len;
+ register int target = errcode &~ REG_ITOA;
+ register char *s;
+ char convbuf[50];
- switch (errcode)
- {
- case REG_ATOI: /* convert name to number */
+ if (errcode == REG_ATOI)
+ s = regatoi(preg, convbuf);
+ else {
for (r = rerrs; r->code >= 0; r++)
- if (strcmp(r->name, errbuf) == 0)
+ if (r->code == target)
break;
- sprintf(convbuf, "%d", r->code); /* -1 for unknown */
- msg = convbuf;
- break;
- case REG_ITOA: /* convert number to name */
- icode = atoi(errbuf); /* not our problem if this fails */
- for (r = rerrs; r->code >= 0; r++)
- if (r->code == icode)
- break;
+
+ if (errcode®_ITOA) {
if (r->code >= 0)
- msg = r->name;
+ (void) strcpy(convbuf, r->name);
else
- { /* unknown; tell him the number */
- sprintf(convbuf, "REG_%u", (unsigned) icode);
- msg = convbuf;
- }
- break;
- default: /* a real, normal error code */
- for (r = rerrs; r->code >= 0; r++)
- if (r->code == errcode)
- break;
- if (r->code >= 0)
- msg = r->explain;
- else
- { /* unknown; say so */
- sprintf(convbuf, unk, errcode);
- msg = convbuf;
- }
- break;
+ sprintf(convbuf, "REG_0x%x", target);
+ assert(strlen(convbuf) < sizeof(convbuf));
+ s = convbuf;
+ } else
+ s = r->explain;
}
- len = strlen(msg) + 1; /* space needed, including NUL */
- if (errbuf_size > 0)
- {
+ len = strlen(s) + 1;
+ if (errbuf_size > 0) {
if (errbuf_size > len)
- strcpy(errbuf, msg);
- else
- { /* truncate to fit */
- memcpy(errbuf, msg, errbuf_size - 1); /*RN - was strncpy*/
- errbuf[errbuf_size - 1] = '\0';
+ (void) strcpy(errbuf, s);
+ else {
+ (void) strncpy(errbuf, s, errbuf_size-1);
+ errbuf[errbuf_size-1] = '\0';
}
}
- return len;
+ return(len);
+}
+
+/*
+ - regatoi - internal routine to implement REG_ATOI
+ == static char *regatoi(const regex_t *preg, char *localbuf);
+ */
+static char *
+regatoi(preg, localbuf)
+const regex_t *preg;
+char *localbuf;
+{
+ register struct rerr *r;
+
+ for (r = rerrs; r->code >= 0; r++)
+ if (strcmp(r->name, preg->re_endp) == 0)
+ break;
+ if (r->code < 0)
+ return("0");
+
+ sprintf(localbuf, "%d", r->code);
+ return(localbuf);
}
diff --git a/src/regex/regerrs.h b/src/regex/regerrs.h
deleted file mode 100644
index f99dbf4f73..0000000000
--- a/src/regex/regerrs.h
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * $Id$
- */
-
-{
- REG_OKAY, "REG_OKAY", "no errors detected"
-},
-
-{
- REG_NOMATCH, "REG_NOMATCH", "failed to match"
-},
-
-{
- REG_BADPAT, "REG_BADPAT", "invalid regexp (reg version 0.8)"
-},
-
-{
- REG_ECOLLATE, "REG_ECOLLATE", "invalid collating element"
-},
-
-{
- REG_ECTYPE, "REG_ECTYPE", "invalid character class"
-},
-
-{
- REG_EESCAPE, "REG_EESCAPE", "invalid escape \\ sequence"
-},
-
-{
- REG_ESUBREG, "REG_ESUBREG", "invalid backreference number"
-},
-
-{
- REG_EBRACK, "REG_EBRACK", "brackets [] not balanced"
-},
-
-{
- REG_EPAREN, "REG_EPAREN", "parentheses () not balanced"
-},
-
-{
- REG_EBRACE, "REG_EBRACE", "braces {} not balanced"
-},
-
-{
- REG_BADBR, "REG_BADBR", "invalid repetition count(s)"
-},
-
-{
- REG_ERANGE, "REG_ERANGE", "invalid character range"
-},
-
-{
- REG_ESPACE, "REG_ESPACE", "out of memory"
-},
-
-{
- REG_BADRPT, "REG_BADRPT", "quantifier operand invalid"
-},
-
-{
- REG_ASSERT, "REG_ASSERT", "\"can't happen\" -- you found a bug"
-},
-
-{
- REG_INVARG, "REG_INVARG", "invalid argument to regex function"
-},
-
-{
- REG_MIXED, "REG_MIXED", "character widths of regex and string differ"
-},
-
-{
- REG_BADOPT, "REG_BADOPT", "invalid embedded option"
-},
diff --git a/src/regex/regex.7 b/src/regex/regex.7
new file mode 100644
index 0000000000..0fa180269e
--- /dev/null
+++ b/src/regex/regex.7
@@ -0,0 +1,235 @@
+.TH REGEX 7 "25 Oct 1995"
+.BY "Henry Spencer"
+.SH NAME
+regex \- POSIX 1003.2 regular expressions
+.SH DESCRIPTION
+Regular expressions (``RE''s),
+as defined in POSIX 1003.2, come in two forms:
+modern REs (roughly those of
+.IR egrep ;
+1003.2 calls these ``extended'' REs)
+and obsolete REs (roughly those of
+.IR ed ;
+1003.2 ``basic'' REs).
+Obsolete REs mostly exist for backward compatibility in some old programs;
+they will be discussed at the end.
+1003.2 leaves some aspects of RE syntax and semantics open;
+`\(dg' marks decisions on these aspects that
+may not be fully portable to other 1003.2 implementations.
+.PP
+A (modern) RE is one\(dg or more non-empty\(dg \fIbranches\fR,
+separated by `|'.
+It matches anything that matches one of the branches.
+.PP
+A branch is one\(dg or more \fIpieces\fR, concatenated.
+It matches a match for the first, followed by a match for the second, etc.
+.PP
+A piece is an \fIatom\fR possibly followed
+by a single\(dg `*', `+', `?', or \fIbound\fR.
+An atom followed by `*' matches a sequence of 0 or more matches of the atom.
+An atom followed by `+' matches a sequence of 1 or more matches of the atom.
+An atom followed by `?' matches a sequence of 0 or 1 matches of the atom.
+.PP
+A \fIbound\fR is `{' followed by an unsigned decimal integer,
+possibly followed by `,'
+possibly followed by another unsigned decimal integer,
+always followed by `}'.
+The integers must lie between 0 and RE_DUP_MAX (255\(dg) inclusive,
+and if there are two of them, the first may not exceed the second.
+An atom followed by a bound containing one integer \fIi\fR
+and no comma matches
+a sequence of exactly \fIi\fR matches of the atom.
+An atom followed by a bound
+containing one integer \fIi\fR and a comma matches
+a sequence of \fIi\fR or more matches of the atom.
+An atom followed by a bound
+containing two integers \fIi\fR and \fIj\fR matches
+a sequence of \fIi\fR through \fIj\fR (inclusive) matches of the atom.
+.PP
+An atom is a regular expression enclosed in `()' (matching a match for the
+regular expression),
+an empty set of `()' (matching the null string)\(dg,
+a \fIbracket expression\fR (see below), `.'
+(matching any single character), `^' (matching the null string at the
+beginning of a line), `$' (matching the null string at the
+end of a line), a `\e' followed by one of the characters
+`^.[$()|*+?{\e'
+(matching that character taken as an ordinary character),
+a `\e' followed by any other character\(dg
+(matching that character taken as an ordinary character,
+as if the `\e' had not been present\(dg),
+or a single character with no other significance (matching that character).
+A `{' followed by a character other than a digit is an ordinary
+character, not the beginning of a bound\(dg.
+It is illegal to end an RE with `\e'.
+.PP
+A \fIbracket expression\fR is a list of characters enclosed in `[]'.
+It normally matches any single character from the list (but see below).
+If the list begins with `^',
+it matches any single character
+(but see below) \fInot\fR from the rest of the list.
+If two characters in the list are separated by `\-', this is shorthand
+for the full \fIrange\fR of characters between those two (inclusive) in the
+collating sequence,
+e.g. `[0\-9]' in ASCII matches any decimal digit.
+It is illegal\(dg for two ranges to share an
+endpoint, e.g. `a\-c\-e'.
+Ranges are very collating-sequence-dependent,
+and portable programs should avoid relying on them.
+.PP
+To include a literal `]' in the list, make it the first character
+(following a possible `^').
+To include a literal `\-', make it the first or last character,
+or the second endpoint of a range.
+To use a literal `\-' as the first endpoint of a range,
+enclose it in `[.' and `.]' to make it a collating element (see below).
+With the exception of these and some combinations using `[' (see next
+paragraphs), all other special characters, including `\e', lose their
+special significance within a bracket expression.
+.PP
+Within a bracket expression, a collating element (a character,
+a multi-character sequence that collates as if it were a single character,
+or a collating-sequence name for either)
+enclosed in `[.' and `.]' stands for the
+sequence of characters of that collating element.
+The sequence is a single element of the bracket expression's list.
+A bracket expression containing a multi-character collating element
+can thus match more than one character,
+e.g. if the collating sequence includes a `ch' collating element,
+then the RE `[[.ch.]]*c' matches the first five characters
+of `chchcc'.
+.PP
+Within a bracket expression, a collating element enclosed in `[=' and
+`=]' is an equivalence class, standing for the sequences of characters
+of all collating elements equivalent to that one, including itself.
+(If there are no other equivalent collating elements,
+the treatment is as if the enclosing delimiters were `[.' and `.]'.)
+For example, if o and \o'o^' are the members of an equivalence class,
+then `[[=o=]]', `[[=\o'o^'=]]', and `[o\o'o^']' are all synonymous.
+An equivalence class may not\(dg be an endpoint
+of a range.
+.PP
+Within a bracket expression, the name of a \fIcharacter class\fR enclosed
+in `[:' and `:]' stands for the list of all characters belonging to that
+class.
+Standard character class names are:
+.PP
+.RS
+.nf
+.ta 3c 6c 9c
+alnum digit punct
+alpha graph space
+blank lower upper
+cntrl print xdigit
+.fi
+.RE
+.PP
+These stand for the character classes defined in
+.IR ctype (3).
+A locale may provide others.
+A character class may not be used as an endpoint of a range.
+.PP
+There are two special cases\(dg of bracket expressions:
+the bracket expressions `[[:<:]]' and `[[:>:]]' match the null string at
+the beginning and end of a word respectively.
+A word is defined as a sequence of
+word characters
+which is neither preceded nor followed by
+word characters.
+A word character is an
+.I alnum
+character (as defined by
+.IR ctype (3))
+or an underscore.
+This is an extension,
+compatible with but not specified by POSIX 1003.2,
+and should be used with
+caution in software intended to be portable to other systems.
+.PP
+In the event that an RE could match more than one substring of a given
+string,
+the RE matches the one starting earliest in the string.
+If the RE could match more than one substring starting at that point,
+it matches the longest.
+Subexpressions also match the longest possible substrings, subject to
+the constraint that the whole match be as long as possible,
+with subexpressions starting earlier in the RE taking priority over
+ones starting later.
+Note that higher-level subexpressions thus take priority over
+their lower-level component subexpressions.
+.PP
+Match lengths are measured in characters, not collating elements.
+A null string is considered longer than no match at all.
+For example,
+`bb*' matches the three middle characters of `abbbc',
+`(wee|week)(knights|nights)' matches all ten characters of `weeknights',
+when `(.*).*' is matched against `abc' the parenthesized subexpression
+matches all three characters, and
+when `(a*)*' is matched against `bc' both the whole RE and the parenthesized
+subexpression match the null string.
+.PP
+If case-independent matching is specified,
+the effect is much as if all case distinctions had vanished from the
+alphabet.
+When an alphabetic that exists in multiple cases appears as an
+ordinary character outside a bracket expression, it is effectively
+transformed into a bracket expression containing both cases,
+e.g. `x' becomes `[xX]'.
+When it appears inside a bracket expression, all case counterparts
+of it are added to the bracket expression, so that (e.g.) `[x]'
+becomes `[xX]' and `[^x]' becomes `[^xX]'.
+.PP
+No particular limit is imposed on the length of REs\(dg.
+Programs intended to be portable should not employ REs longer
+than 256 bytes,
+as an implementation can refuse to accept such REs and remain
+POSIX-compliant.
+.PP
+Obsolete (``basic'') regular expressions differ in several respects.
+`|', `+', and `?' are ordinary characters and there is no equivalent
+for their functionality.
+The delimiters for bounds are `\e{' and `\e}',
+with `{' and `}' by themselves ordinary characters.
+The parentheses for nested subexpressions are `\e(' and `\e)',
+with `(' and `)' by themselves ordinary characters.
+`^' is an ordinary character except at the beginning of the
+RE or\(dg the beginning of a parenthesized subexpression,
+`$' is an ordinary character except at the end of the
+RE or\(dg the end of a parenthesized subexpression,
+and `*' is an ordinary character if it appears at the beginning of the
+RE or the beginning of a parenthesized subexpression
+(after a possible leading `^').
+Finally, there is one new type of atom, a \fIback reference\fR:
+`\e' followed by a non-zero decimal digit \fId\fR
+matches the same sequence of characters
+matched by the \fId\fRth parenthesized subexpression
+(numbering subexpressions by the positions of their opening parentheses,
+left to right),
+so that (e.g.) `\e([bc]\e)\e1' matches `bb' or `cc' but not `bc'.
+.SH SEE ALSO
+regex(3)
+.PP
+POSIX 1003.2, section 2.8 (Regular Expression Notation).
+.SH HISTORY
+Written by Henry Spencer, based on the 1003.2 spec.
+.SH BUGS
+Having two kinds of REs is a botch.
+.PP
+The current 1003.2 spec says that `)' is an ordinary character in
+the absence of an unmatched `(';
+this was an unintentional result of a wording error,
+and change is likely.
+Avoid relying on it.
+.PP
+Back references are a dreadful botch,
+posing major problems for efficient implementations.
+They are also somewhat vaguely defined
+(does
+`a\e(\e(b\e)*\e2\e)*d' match `abbbd'?).
+Avoid using them.
+.PP
+1003.2's specification of case-independent matching is vague.
+The ``one case implies all cases'' definition given above
+is current consensus among implementors as to the right interpretation.
+.PP
+The syntax for word boundaries is incredibly ugly.
diff --git a/src/regex/regex.h b/src/regex/regex.h
index a1fcec2e1c..d094d072d5 100644
--- a/src/regex/regex.h
+++ b/src/regex/regex.h
@@ -1,202 +1,74 @@
#ifndef _REGEX_H_
#define _REGEX_H_ /* never again */
-/*
- * regular expressions
- *
- * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved.
- *
- * Development of this software was funded, in part, by Cray Research Inc.,
- * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
- * Corporation, none of whom are responsible for the results. The author
- * thanks all of them.
- *
- * Redistribution and use in source and binary forms -- with or without
- * modification -- are permitted for any purpose, provided that
- * redistributions in source form retain this entire copyright notice and
- * indicate the origin and nature of any modifications.
- *
- * I'd appreciate being given credit for this package in the documentation
- * of software which uses it, but that is not a requirement.
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
- * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
- * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
- * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
- * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
- * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
- * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
- * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * $Id$
- */
-
-/*
- * Add your own defines, if needed, here.
- */
+/* ========= begin header generated by ./mkh ========= */
#ifdef __cplusplus
extern "C" {
#endif
-#include
-#include
-#include
-
-#ifndef wxCHECK_GCC_VERSION
-#define wxCHECK_GCC_VERSION( major, minor ) \
- ( defined(__GNUC__) && defined(__GNUC_MINOR__) \
- && ( ( __GNUC__ > (major) ) \
- || ( __GNUC__ == (major) && __GNUC_MINOR__ >= (minor) ) ) )
-#endif
-
-#if !wxUSE_UNICODE
-# define wx_wchar char
-#else // Unicode
- #if (defined(__GNUC__) && !wxCHECK_GCC_VERSION(2, 96))
-# define wx_wchar __WCHAR_TYPE__
- #else // __WCHAR_TYPE__ and gcc < 2.96
- // standard case
-# define wx_wchar wchar_t
- #endif // __WCHAR_TYPE__
-#endif // ASCII/Unicode
-
-/*
- * interface types etc.
- */
-
-/*
- * regoff_t has to be large enough to hold either off_t or ssize_t,
- * and must be signed; it's only a guess that long is suitable.
- */
-typedef long regoff_t;
-
-/*
- * other interface types
- */
-
-/* the biggie, a compiled RE (or rather, a front end to same) */
-typedef struct
-{
- int re_magic; /* magic number */
- size_t re_nsub; /* number of subexpressions */
- long re_info; /* information about RE */
-#define REG_UBACKREF 000001
-#define REG_ULOOKAHEAD 000002
-#define REG_UBOUNDS 000004
-#define REG_UBRACES 000010
-#define REG_UBSALNUM 000020
-#define REG_UPBOTCH 000040
-#define REG_UBBS 000100
-#define REG_UNONPOSIX 000200
-#define REG_UUNSPEC 000400
-#define REG_UUNPORT 001000
-#define REG_ULOCALE 002000
-#define REG_UEMPTYMATCH 004000
-#define REG_UIMPOSSIBLE 010000
-#define REG_USHORTEST 020000
- int re_csize; /* sizeof(character) */
- char *re_endp; /* backward compatibility kludge */
- /* the rest is opaque pointers to hidden innards */
- char *re_guts; /* `char *' is more portable than `void *' */
- char *re_fns;
+/* === regex2.h === */
+typedef off_t regoff_t;
+typedef struct {
+ int re_magic;
+ size_t re_nsub; /* number of parenthesized subexpressions */
+ const char *re_endp; /* end pointer for REG_PEND */
+ struct re_guts *re_g; /* none of your business :-) */
} regex_t;
-
-/* result reporting (may acquire more fields later) */
-typedef struct
-{
- regoff_t rm_so; /* start of substring */
- regoff_t rm_eo; /* end of substring */
+typedef struct {
+ regoff_t rm_so; /* start of match */
+ regoff_t rm_eo; /* end of match */
} regmatch_t;
-/* supplementary control and reporting */
-typedef struct
-{
- regmatch_t rm_extend; /* see REG_EXPECT */
-} rm_detail_t;
+
+/* === regcomp.c === */
+extern int regcomp(regex_t *, const char *, int);
+#define REG_BASIC 0000
+#define REG_EXTENDED 0001
+#define REG_ICASE 0002
+#define REG_NOSUB 0004
+#define REG_NEWLINE 0010
+#define REG_NOSPEC 0020
+#define REG_PEND 0040
+#define REG_DUMP 0200
-
-/*
- * regex compilation flags
- */
-#define REG_BASIC 000000 /* BREs (convenience) */
-#define REG_EXTENDED 000001 /* EREs */
-#define REG_ADVF 000002 /* advanced features in EREs */
-#define REG_ADVANCED 000003 /* AREs (which are also EREs) */
-#define REG_QUOTE 000004 /* no special characters, none */
-#define REG_NOSPEC REG_QUOTE /* historical synonym */
-#define REG_ICASE 000010 /* ignore case */
-#define REG_NOSUB 000020 /* don't care about subexpressions */
-#define REG_EXPANDED 000040 /* expanded format, white space & comments */
-#define REG_NLSTOP 000100 /* \n doesn't match . or [^ ] */
-#define REG_NLANCH 000200 /* ^ matches after \n, $ before */
-#define REG_NEWLINE 000300 /* newlines are line terminators */
-#define REG_PEND 000400 /* ugh -- backward-compatibility hack */
-#define REG_EXPECT 001000 /* report details on partial/limited
- * matches */
-#define REG_BOSONLY 002000 /* temporary kludge for BOS-only matches */
-#define REG_DUMP 004000 /* none of your business :-) */
-#define REG_FAKE 010000 /* none of your business :-) */
-#define REG_PROGRESS 020000 /* none of your business :-) */
-
-
-
-/*
- * regex execution flags
- */
-#define REG_NOTBOL 0001 /* BOS is not BOL */
-#define REG_NOTEOL 0002 /* EOS is not EOL */
-#define REG_STARTEND 0004 /* backward compatibility kludge */
-#define REG_FTRACE 0010 /* none of your business */
-#define REG_MTRACE 0020 /* none of your business */
-#define REG_SMALL 0040 /* none of your business */
-
-
-/*
- * error reporting
- * Be careful if modifying the list of error codes -- the table used by
- * regerror() is generated automatically from this file!
- */
-#define REG_OKAY 0 /* no errors detected */
-#define REG_NOMATCH 1 /* failed to match */
-#define REG_BADPAT 2 /* invalid regexp */
-#define REG_ECOLLATE 3 /* invalid collating element */
-#define REG_ECTYPE 4 /* invalid character class */
-#define REG_EESCAPE 5 /* invalid escape \ sequence */
-#define REG_ESUBREG 6 /* invalid backreference number */
-#define REG_EBRACK 7 /* brackets [] not balanced */
-#define REG_EPAREN 8 /* parentheses () not balanced */
-#define REG_EBRACE 9 /* braces {} not balanced */
-#define REG_BADBR 10 /* invalid repetition count(s) */
-#define REG_ERANGE 11 /* invalid character range */
-#define REG_ESPACE 12 /* out of memory */
-#define REG_BADRPT 13 /* quantifier operand invalid */
-#define REG_ASSERT 15 /* "can't happen" -- you found a bug */
-#define REG_INVARG 16 /* invalid argument to regex function */
-#define REG_MIXED 17 /* character widths of regex and string
- * differ */
-#define REG_BADOPT 18 /* invalid embedded option */
-/* two specials for debugging and testing */
-#define REG_ATOI 101 /* convert error-code name to number */
-#define REG_ITOA 102 /* convert error-code number to name */
-
-
-
-/*
- * the prototypes for exported functions
- */
-extern int wx_regcomp(regex_t *, const wx_wchar *, size_t, int);
-extern int regcomp(regex_t *, const wx_wchar *, int);
-extern int wx_regexec(regex_t *, const wx_wchar *, size_t, rm_detail_t *, size_t, regmatch_t[], int);
-extern int regexec(regex_t *, const wx_wchar *, size_t, regmatch_t[], int);
-extern void regfree(regex_t *);
+/* === regerror.c === */
+#define REG_OKAY 0
+#define REG_NOMATCH 1
+#define REG_BADPAT 2
+#define REG_ECOLLATE 3
+#define REG_ECTYPE 4
+#define REG_EESCAPE 5
+#define REG_ESUBREG 6
+#define REG_EBRACK 7
+#define REG_EPAREN 8
+#define REG_EBRACE 9
+#define REG_BADBR 10
+#define REG_ERANGE 11
+#define REG_ESPACE 12
+#define REG_BADRPT 13
+#define REG_EMPTY 14
+#define REG_ASSERT 15
+#define REG_INVARG 16
+#define REG_ATOI 255 /* convert name to number (!) */
+#define REG_ITOA 0400 /* convert number to name (!) */
extern size_t regerror(int, const regex_t *, char *, size_t);
-extern void wx_regfree(regex_t *);
-extern size_t wx_regerror(int, const regex_t *, char *, size_t);
+
+
+/* === regexec.c === */
+extern int regexec(const regex_t *, const char *, size_t, regmatch_t [], int);
+#define REG_NOTBOL 00001
+#define REG_NOTEOL 00002
+#define REG_STARTEND 00004
+#define REG_TRACE 00400 /* tracing of execution */
+#define REG_LARGE 01000 /* force large representation */
+#define REG_BACKR 02000 /* force use of backref code */
+
+
+/* === regfree.c === */
+extern void regfree(regex_t *);
#ifdef __cplusplus
}
#endif
-
-#endif /* _REGEX_H_ */
+/* ========= end header generated by ./mkh ========= */
+#endif
diff --git a/src/regex/regex2.h b/src/regex/regex2.h
new file mode 100644
index 0000000000..58fd8d8a43
--- /dev/null
+++ b/src/regex/regex2.h
@@ -0,0 +1,134 @@
+/*
+ * First, the stuff that ends up in the outside-world include file
+ = typedef off_t regoff_t;
+ = typedef struct {
+ = int re_magic;
+ = size_t re_nsub; // number of parenthesized subexpressions
+ = const char *re_endp; // end pointer for REG_PEND
+ = struct re_guts *re_g; // none of your business :-)
+ = } regex_t;
+ = typedef struct {
+ = regoff_t rm_so; // start of match
+ = regoff_t rm_eo; // end of match
+ = } regmatch_t;
+ */
+/*
+ * internals of regex_t
+ */
+#define MAGIC1 ((('r'^0200)<<8) | 'e')
+
+/*
+ * The internal representation is a *strip*, a sequence of
+ * operators ending with an endmarker. (Some terminology etc. is a
+ * historical relic of earlier versions which used multiple strips.)
+ * Certain oddities in the representation are there to permit running
+ * the machinery backwards; in particular, any deviation from sequential
+ * flow must be marked at both its source and its destination. Some
+ * fine points:
+ *
+ * - OPLUS_ and O_PLUS are *inside* the loop they create.
+ * - OQUEST_ and O_QUEST are *outside* the bypass they create.
+ * - OCH_ and O_CH are *outside* the multi-way branch they create, while
+ * OOR1 and OOR2 are respectively the end and the beginning of one of
+ * the branches. Note that there is an implicit OOR2 following OCH_
+ * and an implicit OOR1 preceding O_CH.
+ *
+ * In state representations, an operator's bit is on to signify a state
+ * immediately *preceding* "execution" of that operator.
+ */
+typedef long sop; /* strip operator */
+typedef long sopno;
+#define OPRMASK 0x7c000000
+#define OPDMASK 0x03ffffff
+#define OPSHIFT (26)
+#define OP(n) ((n)&OPRMASK)
+#define OPND(n) ((n)&OPDMASK)
+#define SOP(op, opnd) ((op)|(opnd))
+/* operators meaning operand */
+/* (back, fwd are offsets) */
+#define OEND (1< uch [csetsize] */
+ uch mask; /* bit within array */
+ uch hash; /* hash code */
+ size_t smultis;
+ char *multis; /* -> char[smulti] ab\0cd\0ef\0\0 */
+} cset;
+/* note that CHadd and CHsub are unsafe, and CHIN doesn't yield 0/1 */
+#define CHadd(cs, c) ((cs)->ptr[(uch)(c)] |= (cs)->mask, (cs)->hash += (c))
+#define CHsub(cs, c) ((cs)->ptr[(uch)(c)] &= ~(cs)->mask, (cs)->hash -= (c))
+#define CHIN(cs, c) ((cs)->ptr[(uch)(c)] & (cs)->mask)
+#define MCadd(p, cs, cp) mcadd(p, cs, cp) /* regcomp() internal fns */
+#define MCsub(p, cs, cp) mcsub(p, cs, cp)
+#define MCin(p, cs, cp) mcin(p, cs, cp)
+
+/* stuff for character categories */
+typedef unsigned char cat_t;
+
+/*
+ * main compiled-expression structure
+ */
+struct re_guts {
+ int magic;
+# define MAGIC2 ((('R'^0200)<<8)|'E')
+ sop *strip; /* malloced area for strip */
+ int csetsize; /* number of bits in a cset vector */
+ int ncsets; /* number of csets in use */
+ cset *sets; /* -> cset [ncsets] */
+ uch *setbits; /* -> uch[csetsize][ncsets/CHAR_BIT] */
+ int cflags; /* copy of regcomp() cflags argument */
+ sopno nstates; /* = number of sops */
+ sopno firststate; /* the initial OEND (normally 0) */
+ sopno laststate; /* the final OEND */
+ int iflags; /* internal flags */
+# define USEBOL 01 /* used ^ */
+# define USEEOL 02 /* used $ */
+# define BAD 04 /* something wrong */
+ int nbol; /* number of ^ used */
+ int neol; /* number of $ used */
+ int ncategories; /* how many character categories */
+ cat_t *categories; /* ->catspace[-CHAR_MIN] */
+ char *must; /* match must contain this string */
+ int mlen; /* length of must */
+ size_t nsub; /* copy of re_nsub */
+ int backrefs; /* does it use back references? */
+ sopno nplus; /* how deep does it nest +s? */
+ /* catspace must be last */
+ cat_t catspace[1]; /* actually [NC] */
+};
+
+/* misc utilities */
+#define OUT (CHAR_MAX+1) /* a non-character value */
+#define ISWORD(c) (isalnum(c) || (c) == '_')
diff --git a/src/regex/regexec.c b/src/regex/regexec.c
index c3a29093f5..d64bc28fcb 100644
--- a/src/regex/regexec.c
+++ b/src/regex/regexec.c
@@ -1,1089 +1,144 @@
/*
- * re_*exec and friends - match REs
- *
- * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved.
- *
- * Development of this software was funded, in part, by Cray Research Inc.,
- * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
- * Corporation, none of whom are responsible for the results. The author
- * thanks all of them.
- *
- * Redistribution and use in source and binary forms -- with or without
- * modification -- are permitted for any purpose, provided that
- * redistributions in source form retain this entire copyright notice and
- * indicate the origin and nature of any modifications.
- *
- * I'd appreciate being given credit for this package in the documentation
- * of software which uses it, but that is not a requirement.
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
- * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
- * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
- * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
- * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
- * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
- * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
- * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * $Header: /projects/cvsroot/pgsql-server/src/backend/regex/regexec.c,v 1.23 2003/08/08 21:41:56 momjian Exp $
+ * the outer shell of regexec()
*
+ * This file includes engine.c *twice*, after muchos fiddling with the
+ * macros that code uses. This lets the same code operate on two different
+ * representations for state sets.
*/
+#if defined(__MWERKS__) && !defined(__MACH__)
+typedef long off_t ;
+#else
+#include
+#endif
+#include
+#include
+#include
+#include
+#include
+#include "regex.h"
-#include "regguts.h"
+#include "utils.h"
+#include "regex2.h"
+#ifdef REDEBUG
+static int nope = 0; /* for use in asserts; shuts lint up */
+#endif
+/* macros for manipulating states, small version */
+#define states unsigned
+#define states1 unsigned /* for later use in regexec() decision */
+#define CLEAR(v) ((v) = 0)
+#define SET0(v, n) ((v) &= ~((unsigned)1 << (n)))
+#define SET1(v, n) ((v) |= (unsigned)1 << (n))
+#define ISSET(v, n) ((v) & ((unsigned)1 << (n)))
+#define ASSIGN(d, s) ((d) = (s))
+#define EQ(a, b) ((a) == (b))
+#define STATEVARS int dummy /* dummy version */
+#define STATESETUP(m, n) /* nothing */
+#define STATETEARDOWN(m) /* nothing */
+#define SETUP(v) ((v) = 0)
+#define onestate unsigned
+#define INIT(o, n) ((o) = (unsigned)1 << (n))
+#define INC(o) ((o) <<= 1)
+#define ISSTATEIN(v, o) ((v) & (o))
+/* some abbreviations; note that some of these know variable names! */
+/* do "if I'm here, I can also be there" etc without branches */
+#define FWD(dst, src, n) ((dst) |= ((unsigned)(src)&(here)) << (n))
+#define BACK(dst, src, n) ((dst) |= ((unsigned)(src)&(here)) >> (n))
+#define ISSETBACK(v, n) ((v) & ((unsigned)here >> (n)))
+/* function names */
+#define SNAMES /* engine.c looks after details */
-/* lazy-DFA representation */
-struct arcp
-{ /* "pointer" to an outarc */
- struct sset *ss;
- color co;
-};
+#include "engine.c"
-struct sset
-{ /* state set */
- unsigned *states; /* pointer to bitvector */
- unsigned hash; /* hash of bitvector */
-#define HASH(bv, nw) (((nw) == 1) ? *(bv) : hash(bv, nw))
-#define HIT(h,bv,ss,nw) ((ss)->hash == (h) && ((nw) == 1 || \
- memcmp(VS(bv), VS((ss)->states), (nw)*sizeof(unsigned)) == 0))
- int flags;
-#define STARTER 01 /* the initial state set */
-#define POSTSTATE 02 /* includes the goal state */
-#define LOCKED 04 /* locked in cache */
-#define NOPROGRESS 010 /* zero-progress state set */
- struct arcp ins; /* chain of inarcs pointing here */
- chr *lastseen; /* last entered on arrival here */
- struct sset **outs; /* outarc vector indexed by color */
- struct arcp *inchain; /* chain-pointer vector for outarcs */
-};
-
-struct dfa
-{
- int nssets; /* size of cache */
- int nssused; /* how many entries occupied yet */
- int nstates; /* number of states */
- int ncolors; /* length of outarc and inchain vectors */
- int wordsper; /* length of state-set bitvectors */
- struct sset *ssets; /* state-set cache */
- unsigned *statesarea; /* bitvector storage */
- unsigned *work; /* pointer to work area within statesarea */
- struct sset **outsarea; /* outarc-vector storage */
- struct arcp *incarea; /* inchain storage */
- struct cnfa *cnfa;
- struct colormap *cm;
- chr *lastpost; /* location of last cache-flushed success */
- chr *lastnopr; /* location of last cache-flushed
- * NOPROGRESS */
- struct sset *search; /* replacement-search-pointer memory */
- int cptsmalloced; /* were the areas individually malloced? */
- char *mallocarea; /* self, or master malloced area, or NULL */
-};
-
-#define WORK 1 /* number of work bitvectors needed */
-
-/* setup for non-malloc allocation for small cases */
-#define FEWSTATES 20 /* must be less than UBITS */
-#define FEWCOLORS 15
-struct smalldfa
-{
- struct dfa dfa;
- struct sset ssets[FEWSTATES * 2];
- unsigned statesarea[FEWSTATES * 2 + WORK];
- struct sset *outsarea[FEWSTATES * 2 * FEWCOLORS];
- struct arcp incarea[FEWSTATES * 2 * FEWCOLORS];
-};
-
-#define DOMALLOC ((struct smalldfa *)NULL) /* force malloc */
-
-
-
-/* internal variables, bundled for easy passing around */
-struct vars
-{
- regex_t *re;
- struct guts *g;
- int eflags; /* copies of arguments */
- size_t nmatch;
- regmatch_t *pmatch;
- rm_detail_t *details;
- chr *start; /* start of string */
- chr *stop; /* just past end of string */
- int err; /* error code if any (0 none) */
- regoff_t *mem; /* memory vector for backtracking */
- struct smalldfa dfa1;
- struct smalldfa dfa2;
-};
-
-#define VISERR(vv) ((vv)->err != 0) /* have we seen an error yet? */
-#define ISERR() VISERR(v)
-#define VERR(vv,e) (((vv)->err) ? (vv)->err : ((vv)->err = (e)))
-#define ERR(e) VERR(v, e) /* record an error */
-#define NOERR() {if (ISERR()) return v->err;} /* if error seen, return
- * it */
-#define OFF(p) ((p) - v->start)
-#define LOFF(p) ((long)OFF(p))
+/* now undo things */
+#undef states
+#undef CLEAR
+#undef SET0
+#undef SET1
+#undef ISSET
+#undef ASSIGN
+#undef EQ
+#undef STATEVARS
+#undef STATESETUP
+#undef STATETEARDOWN
+#undef SETUP
+#undef onestate
+#undef INIT
+#undef INC
+#undef ISSTATEIN
+#undef FWD
+#undef BACK
+#undef ISSETBACK
+#undef SNAMES
+/* macros for manipulating states, large version */
+#define states char *
+#define CLEAR(v) memset(v, 0, m->g->nstates)
+#define SET0(v, n) ((v)[n] = 0)
+#define SET1(v, n) ((v)[n] = 1)
+#define ISSET(v, n) ((v)[n])
+#define ASSIGN(d, s) memcpy(d, s, m->g->nstates)
+#define EQ(a, b) (memcmp(a, b, m->g->nstates) == 0)
+#define STATEVARS int vn; char *space
+#define STATESETUP(m, nv) { (m)->space = malloc((nv)*(m)->g->nstates); \
+ if ((m)->space == NULL) return(REG_ESPACE); \
+ (m)->vn = 0; }
+#define STATETEARDOWN(m) { free((m)->space); }
+#define SETUP(v) ((v) = &m->space[m->vn++ * m->g->nstates])
+#define onestate int
+#define INIT(o, n) ((o) = (n))
+#define INC(o) ((o)++)
+#define ISSTATEIN(v, o) ((v)[o])
+/* some abbreviations; note that some of these know variable names! */
+/* do "if I'm here, I can also be there" etc without branches */
+#define FWD(dst, src, n) ((dst)[here+(n)] |= (src)[here])
+#define BACK(dst, src, n) ((dst)[here-(n)] |= (src)[here])
+#define ISSETBACK(v, n) ((v)[here - (n)])
+/* function names */
+#define LNAMES /* flag */
+#include "engine.c"
/*
- * forward declarations
+ - regexec - interface for matching
+ = extern int regexec(const regex_t *, const char *, size_t, \
+ = regmatch_t [], int);
+ = #define REG_NOTBOL 00001
+ = #define REG_NOTEOL 00002
+ = #define REG_STARTEND 00004
+ = #define REG_TRACE 00400 // tracing of execution
+ = #define REG_LARGE 01000 // force large representation
+ = #define REG_BACKR 02000 // force use of backref code
+ *
+ * We put this here so we can exploit knowledge of the state representation
+ * when choosing which matcher to call. Also, by this point the matchers
+ * have been prototyped.
*/
-/* === regexec.c === */
-static int find(struct vars *, struct cnfa *, struct colormap *);
-static int cfind(struct vars *, struct cnfa *, struct colormap *);
-static int cfindloop(struct vars *, struct cnfa *, struct colormap *, struct dfa *, struct dfa *, chr **);
-static void zapsubs(regmatch_t *, size_t);
-static void zapmem(struct vars *, struct subre *);
-static void subset(struct vars *, struct subre *, chr *, chr *);
-static int dissect(struct vars *, struct subre *, chr *, chr *);
-static int condissect(struct vars *, struct subre *, chr *, chr *);
-static int altdissect(struct vars *, struct subre *, chr *, chr *);
-static int cdissect(struct vars *, struct subre *, chr *, chr *);
-static int ccondissect(struct vars *, struct subre *, chr *, chr *);
-static int crevdissect(struct vars *, struct subre *, chr *, chr *);
-static int cbrdissect(struct vars *, struct subre *, chr *, chr *);
-static int caltdissect(struct vars *, struct subre *, chr *, chr *);
-
-/* === rege_dfa.c === */
-static chr *longest(struct vars *, struct dfa *, chr *, chr *, int *);
-static chr *shortest(struct vars *, struct dfa *, chr *, chr *, chr *, chr **, int *);
-static chr *lastcold(struct vars *, struct dfa *);
-static struct dfa *newdfa(struct vars *, struct cnfa *, struct colormap *, struct smalldfa *);
-static void freedfa(struct dfa *);
-static unsigned hash(unsigned *, int);
-static struct sset *initialize(struct vars *, struct dfa *, chr *);
-static struct sset *miss(struct vars *, struct dfa *, struct sset *, pcolor, chr *, chr *);
-static int lacon(struct vars *, struct cnfa *, chr *, pcolor);
-static struct sset *getvacant(struct vars *, struct dfa *, chr *, chr *);
-static struct sset *pickss(struct vars *, struct dfa *, chr *, chr *);
-
-
-/*
- * regexec - match regular expression
- */
-int
-regexec(regex_t *re,
- const chr *string,
- size_t nmatch,
- regmatch_t pmatch[],
- int flags)
+int /* 0 success, REG_NOMATCH failure */
+regexec(preg, string, nmatch, pmatch, eflags)
+const regex_t *preg;
+const char *string;
+size_t nmatch;
+regmatch_t pmatch[];
+int eflags;
{
- rm_detail_t det;
- size_t nLen = 0;
- chr* s2 = (chr*) string;
+ register struct re_guts *g = preg->re_g;
+#ifdef REDEBUG
+# define GOODFLAGS(f) (f)
+#else
+# define GOODFLAGS(f) ((f)&(REG_NOTBOL|REG_NOTEOL|REG_STARTEND))
+#endif
- if (string && *string)
- {
- while(*++s2);
- }
+ if (preg->re_magic != MAGIC1 || g->magic != MAGIC2)
+ return(REG_BADPAT);
+ assert(!(g->iflags&BAD));
+ if (g->iflags&BAD) /* backstop for no-debug case */
+ return(REG_BADPAT);
+ eflags = GOODFLAGS(eflags);
- nLen = ((s2 - string) / sizeof(chr));
-
- return wx_regexec(re, string, nLen, &det, nmatch, pmatch, flags);
-}
-int
-wx_regexec(regex_t *re,
- const chr *string,
- size_t len,
- rm_detail_t *details,
- size_t nmatch,
- regmatch_t pmatch[],
- int flags)
-{
- struct vars var;
- register struct vars *v = &var;
- int st;
- size_t n;
- int backref;
-
-#define LOCALMAT 20
- regmatch_t mat[LOCALMAT];
-
-#define LOCALMEM 40
- regoff_t mem[LOCALMEM];
-
- /* sanity checks */
- if (re == NULL || string == NULL || re->re_magic != REMAGIC)
- return REG_INVARG;
- if (re->re_csize != sizeof(chr))
- return REG_MIXED;
-
- /* setup */
- v->re = re;
- v->g = (struct guts *) re->re_guts;
- if ((v->g->cflags & REG_EXPECT) && details == NULL)
- return REG_INVARG;
- if (v->g->info & REG_UIMPOSSIBLE)
- return REG_NOMATCH;
- backref = (v->g->info & REG_UBACKREF) ? 1 : 0;
- v->eflags = flags;
- if (v->g->cflags & REG_NOSUB)
- nmatch = 0; /* override client */
- v->nmatch = nmatch;
- if (backref)
- {
- /* need work area */
- if (v->g->nsub + 1 <= LOCALMAT)
- v->pmatch = mat;
- else
- v->pmatch = (regmatch_t *) MALLOC((v->g->nsub + 1) *
- sizeof(regmatch_t));
- if (v->pmatch == NULL)
- return REG_ESPACE;
- v->nmatch = v->g->nsub + 1;
- }
+ if (g->nstates <= CHAR_BIT*sizeof(states1) && !(eflags®_LARGE))
+ return(smatcher(g, (char *)string, nmatch, pmatch, eflags));
else
- v->pmatch = pmatch;
- v->details = details;
- v->start = (chr *) string;
- v->stop = (chr *) string + len;
- v->err = 0;
- if (backref)
- {
- /* need retry memory */
- assert(v->g->ntree >= 0);
- n = (size_t) v->g->ntree;
- if (n <= LOCALMEM)
- v->mem = mem;
- else
- v->mem = (regoff_t *) MALLOC(n * sizeof(regoff_t));
- if (v->mem == NULL)
- {
- if (v->pmatch != pmatch && v->pmatch != mat)
- FREE(v->pmatch);
- return REG_ESPACE;
- }
- }
- else
- v->mem = NULL;
-
- /* do it */
- assert(v->g->tree != NULL);
- if (backref)
- st = cfind(v, &v->g->tree->cnfa, &v->g->cmap);
- else
- st = find(v, &v->g->tree->cnfa, &v->g->cmap);
-
- /* copy (portion of) match vector over if necessary */
- if (st == REG_OKAY && v->pmatch != pmatch && nmatch > 0)
- {
- zapsubs(pmatch, nmatch);
- n = (nmatch < v->nmatch) ? nmatch : v->nmatch;
- memcpy(VS(pmatch), VS(v->pmatch), n * sizeof(regmatch_t));
- }
-
- /* clean up */
- if (v->pmatch != pmatch && v->pmatch != mat)
- FREE(v->pmatch);
- if (v->mem != NULL && v->mem != mem)
- FREE(v->mem);
- return st;
+ return(lmatcher(g, (char *)string, nmatch, pmatch, eflags));
}
-
-/*
- * find - find a match for the main NFA (no-complications case)
- */
-static int
-find(struct vars * v,
- struct cnfa * cnfa,
- struct colormap * cm)
-{
- struct dfa *s;
- struct dfa *d;
- chr *begin;
- chr *end = NULL;
- chr *cold;
- chr *open; /* open and close of range of possible
- * starts */
- chr *close;
- int hitend;
- int shorter = (v->g->tree->flags & SHORTER) ? 1 : 0;
-
- /* first, a shot with the search RE */
- s = newdfa(v, &v->g->search, cm, &v->dfa1);
- assert(!(ISERR() && s != NULL));
- NOERR();
- MDEBUG(("\nsearch at %ld\n", LOFF(v->start)));
- cold = NULL;
- close = shortest(v, s, v->start, v->start, v->stop, &cold, (int *) NULL);
- freedfa(s);
- NOERR();
- if (v->g->cflags & REG_EXPECT)
- {
- assert(v->details != NULL);
- if (cold != NULL)
- v->details->rm_extend.rm_so = OFF(cold);
- else
- v->details->rm_extend.rm_so = OFF(v->stop);
- v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */
- }
- if (close == NULL) /* not found */
- return REG_NOMATCH;
- if (v->nmatch == 0) /* found, don't need exact location */
- return REG_OKAY;
-
- /* find starting point and match */
- assert(cold != NULL);
- open = cold;
- cold = NULL;
- MDEBUG(("between %ld and %ld\n", LOFF(open), LOFF(close)));
- d = newdfa(v, cnfa, cm, &v->dfa1);
- assert(!(ISERR() && d != NULL));
- NOERR();
- for (begin = open; begin <= close; begin++)
- {
- MDEBUG(("\nfind trying at %ld\n", LOFF(begin)));
- if (shorter)
- end = shortest(v, d, begin, begin, v->stop,
- (chr **) NULL, &hitend);
- else
- end = longest(v, d, begin, v->stop, &hitend);
- NOERR();
- if (hitend && cold == NULL)
- cold = begin;
- if (end != NULL)
- break; /* NOTE BREAK OUT */
- }
- assert(end != NULL); /* search RE succeeded so loop should */
- freedfa(d);
-
- /* and pin down details */
- assert(v->nmatch > 0);
- v->pmatch[0].rm_so = OFF(begin);
- v->pmatch[0].rm_eo = OFF(end);
- if (v->g->cflags & REG_EXPECT)
- {
- if (cold != NULL)
- v->details->rm_extend.rm_so = OFF(cold);
- else
- v->details->rm_extend.rm_so = OFF(v->stop);
- v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */
- }
- if (v->nmatch == 1) /* no need for submatches */
- return REG_OKAY;
-
- /* submatches */
- zapsubs(v->pmatch, v->nmatch);
- return dissect(v, v->g->tree, begin, end);
-}
-
-/*
- * cfind - find a match for the main NFA (with complications)
- */
-static int
-cfind(struct vars * v,
- struct cnfa * cnfa,
- struct colormap * cm)
-{
- struct dfa *s;
- struct dfa *d;
- chr *cold;
- int ret;
-
- s = newdfa(v, &v->g->search, cm, &v->dfa1);
- NOERR();
- d = newdfa(v, cnfa, cm, &v->dfa2);
- if (ISERR())
- {
- assert(d == NULL);
- freedfa(s);
- return v->err;
- }
-
- ret = cfindloop(v, cnfa, cm, d, s, &cold);
-
- freedfa(d);
- freedfa(s);
- NOERR();
- if (v->g->cflags & REG_EXPECT)
- {
- assert(v->details != NULL);
- if (cold != NULL)
- v->details->rm_extend.rm_so = OFF(cold);
- else
- v->details->rm_extend.rm_so = OFF(v->stop);
- v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */
- }
- return ret;
-}
-
-/*
- * cfindloop - the heart of cfind
- */
-static int
-cfindloop(struct vars * v,
- struct cnfa * cnfa,
- struct colormap * cm,
- struct dfa * d,
- struct dfa * s,
- chr **coldp) /* where to put coldstart pointer */
-{
- chr *begin;
- chr *end;
- chr *cold;
- chr *open; /* open and close of range of possible
- * starts */
- chr *close;
- chr *estart;
- chr *estop;
- int er;
- int shorter = v->g->tree->flags & SHORTER;
- int hitend;
-
- assert(d != NULL && s != NULL);
- cold = NULL;
- close = v->start;
- do
- {
- MDEBUG(("\ncsearch at %ld\n", LOFF(close)));
- close = shortest(v, s, close, close, v->stop, &cold, (int *) NULL);
- if (close == NULL)
- break; /* NOTE BREAK */
- assert(cold != NULL);
- open = cold;
- cold = NULL;
- MDEBUG(("cbetween %ld and %ld\n", LOFF(open), LOFF(close)));
- for (begin = open; begin <= close; begin++)
- {
- MDEBUG(("\ncfind trying at %ld\n", LOFF(begin)));
- estart = begin;
- estop = v->stop;
- for (;;)
- {
- if (shorter)
- end = shortest(v, d, begin, estart,
- estop, (chr **) NULL, &hitend);
- else
- end = longest(v, d, begin, estop,
- &hitend);
- if (hitend && cold == NULL)
- cold = begin;
- if (end == NULL)
- break; /* NOTE BREAK OUT */
- MDEBUG(("tentative end %ld\n", LOFF(end)));
- zapsubs(v->pmatch, v->nmatch);
- zapmem(v, v->g->tree);
- er = cdissect(v, v->g->tree, begin, end);
- if (er == REG_OKAY)
- {
- if (v->nmatch > 0)
- {
- v->pmatch[0].rm_so = OFF(begin);
- v->pmatch[0].rm_eo = OFF(end);
- }
- *coldp = cold;
- return REG_OKAY;
- }
- if (er != REG_NOMATCH)
- {
- ERR(er);
- return er;
- }
- if ((shorter) ? end == estop : end == begin)
- {
- /* no point in trying again */
- *coldp = cold;
- return REG_NOMATCH;
- }
- /* go around and try again */
- if (shorter)
- estart = end + 1;
- else
- estop = end - 1;
- }
- }
- } while (close < v->stop);
-
- *coldp = cold;
- return REG_NOMATCH;
-}
-
-/*
- * zapsubs - initialize the subexpression matches to "no match"
- */
-static void
-zapsubs(regmatch_t *p,
- size_t n)
-{
- size_t i;
-
- for (i = n - 1; i > 0; i--)
- {
- p[i].rm_so = -1;
- p[i].rm_eo = -1;
- }
-}
-
-/*
- * zapmem - initialize the retry memory of a subtree to zeros
- */
-static void
-zapmem(struct vars * v,
- struct subre * t)
-{
- if (t == NULL)
- return;
-
- assert(v->mem != NULL);
- v->mem[t->retry] = 0;
- if (t->op == '(')
- {
- assert(t->subno > 0);
- v->pmatch[t->subno].rm_so = -1;
- v->pmatch[t->subno].rm_eo = -1;
- }
-
- if (t->left != NULL)
- zapmem(v, t->left);
- if (t->right != NULL)
- zapmem(v, t->right);
-}
-
-/*
- * subset - set any subexpression relevant to a successful subre
- */
-static void
-subset(struct vars * v,
- struct subre * sub,
- chr *begin,
- chr *end)
-{
- int n = sub->subno;
-
- assert(n > 0);
- if ((size_t) n >= v->nmatch)
- return;
-
- MDEBUG(("setting %d\n", n));
- v->pmatch[n].rm_so = OFF(begin);
- v->pmatch[n].rm_eo = OFF(end);
-}
-
-/*
- * dissect - determine subexpression matches (uncomplicated case)
- */
-static int /* regexec return code */
-dissect(struct vars * v,
- struct subre * t,
- chr *begin, /* beginning of relevant substring */
- chr *end) /* end of same */
-{
- assert(t != NULL);
- MDEBUG(("dissect %ld-%ld\n", LOFF(begin), LOFF(end)));
-
- switch (t->op)
- {
- case '=': /* terminal node */
- assert(t->left == NULL && t->right == NULL);
- return REG_OKAY; /* no action, parent did the work */
- break;
- case '|': /* alternation */
- assert(t->left != NULL);
- return altdissect(v, t, begin, end);
- break;
- case 'b': /* back ref -- shouldn't be calling us! */
- return REG_ASSERT;
- break;
- case '.': /* concatenation */
- assert(t->left != NULL && t->right != NULL);
- return condissect(v, t, begin, end);
- break;
- case '(': /* capturing */
- assert(t->left != NULL && t->right == NULL);
- assert(t->subno > 0);
- subset(v, t, begin, end);
- return dissect(v, t->left, begin, end);
- break;
- default:
- return REG_ASSERT;
- break;
- }
-}
-
-/*
- * condissect - determine concatenation subexpression matches (uncomplicated)
- */
-static int /* regexec return code */
-condissect(struct vars * v,
- struct subre * t,
- chr *begin, /* beginning of relevant substring */
- chr *end) /* end of same */
-{
- struct dfa *d;
- struct dfa *d2;
- chr *mid;
- int i;
- int shorter = (t->left->flags & SHORTER) ? 1 : 0;
- chr *stop = (shorter) ? end : begin;
-
- assert(t->op == '.');
- assert(t->left != NULL && t->left->cnfa.nstates > 0);
- assert(t->right != NULL && t->right->cnfa.nstates > 0);
-
- d = newdfa(v, &t->left->cnfa, &v->g->cmap, &v->dfa1);
- NOERR();
- d2 = newdfa(v, &t->right->cnfa, &v->g->cmap, &v->dfa2);
- if (ISERR())
- {
- assert(d2 == NULL);
- freedfa(d);
- return v->err;
- }
-
- /* pick a tentative midpoint */
- if (shorter)
- mid = shortest(v, d, begin, begin, end, (chr **) NULL,
- (int *) NULL);
- else
- mid = longest(v, d, begin, end, (int *) NULL);
- if (mid == NULL)
- {
- freedfa(d);
- freedfa(d2);
- return REG_ASSERT;
- }
- MDEBUG(("tentative midpoint %ld\n", LOFF(mid)));
-
- /* iterate until satisfaction or failure */
- while (longest(v, d2, mid, end, (int *) NULL) != end)
- {
- /* that midpoint didn't work, find a new one */
- if (mid == stop)
- {
- /* all possibilities exhausted! */
- MDEBUG(("no midpoint!\n"));
- freedfa(d);
- freedfa(d2);
- return REG_ASSERT;
- }
- if (shorter)
- mid = shortest(v, d, begin, mid + 1, end, (chr **) NULL,
- (int *) NULL);
- else
- mid = longest(v, d, begin, mid - 1, (int *) NULL);
- if (mid == NULL)
- {
- /* failed to find a new one! */
- MDEBUG(("failed midpoint!\n"));
- freedfa(d);
- freedfa(d2);
- return REG_ASSERT;
- }
- MDEBUG(("new midpoint %ld\n", LOFF(mid)));
- }
-
- /* satisfaction */
- MDEBUG(("successful\n"));
- freedfa(d);
- freedfa(d2);
- i = dissect(v, t->left, begin, mid);
- if (i != REG_OKAY)
- return i;
- return dissect(v, t->right, mid, end);
-}
-
-/*
- * altdissect - determine alternative subexpression matches (uncomplicated)
- */
-static int /* regexec return code */
-altdissect(struct vars * v,
- struct subre * t,
- chr *begin, /* beginning of relevant substring */
- chr *end) /* end of same */
-{
- struct dfa *d;
- int i;
-
- assert(t != NULL);
- assert(t->op == '|');
-
- for (i = 0; t != NULL; t = t->right, i++)
- {
- MDEBUG(("trying %dth\n", i));
- assert(t->left != NULL && t->left->cnfa.nstates > 0);
- d = newdfa(v, &t->left->cnfa, &v->g->cmap, &v->dfa1);
- if (ISERR())
- return v->err;
- if (longest(v, d, begin, end, (int *) NULL) == end)
- {
- MDEBUG(("success\n"));
- freedfa(d);
- return dissect(v, t->left, begin, end);
- }
- freedfa(d);
- }
- return REG_ASSERT; /* none of them matched?!? */
-}
-
-/*
- * cdissect - determine subexpression matches (with complications)
- * The retry memory stores the offset of the trial midpoint from begin,
- * plus 1 so that 0 uniquely means "clean slate".
- */
-static int /* regexec return code */
-cdissect(struct vars * v,
- struct subre * t,
- chr *begin, /* beginning of relevant substring */
- chr *end) /* end of same */
-{
- int er;
-
- assert(t != NULL);
- MDEBUG(("cdissect %ld-%ld %c\n", LOFF(begin), LOFF(end), t->op));
-
- switch (t->op)
- {
- case '=': /* terminal node */
- assert(t->left == NULL && t->right == NULL);
- return REG_OKAY; /* no action, parent did the work */
- break;
- case '|': /* alternation */
- assert(t->left != NULL);
- return caltdissect(v, t, begin, end);
- break;
- case 'b': /* back ref -- shouldn't be calling us! */
- assert(t->left == NULL && t->right == NULL);
- return cbrdissect(v, t, begin, end);
- break;
- case '.': /* concatenation */
- assert(t->left != NULL && t->right != NULL);
- return ccondissect(v, t, begin, end);
- break;
- case '(': /* capturing */
- assert(t->left != NULL && t->right == NULL);
- assert(t->subno > 0);
- er = cdissect(v, t->left, begin, end);
- if (er == REG_OKAY)
- subset(v, t, begin, end);
- return er;
- break;
- default:
- return REG_ASSERT;
- break;
- }
-}
-
-/*
- * ccondissect - concatenation subexpression matches (with complications)
- * The retry memory stores the offset of the trial midpoint from begin,
- * plus 1 so that 0 uniquely means "clean slate".
- */
-static int /* regexec return code */
-ccondissect(struct vars * v,
- struct subre * t,
- chr *begin, /* beginning of relevant substring */
- chr *end) /* end of same */
-{
- struct dfa *d;
- struct dfa *d2;
- chr *mid;
- int er;
-
- assert(t->op == '.');
- assert(t->left != NULL && t->left->cnfa.nstates > 0);
- assert(t->right != NULL && t->right->cnfa.nstates > 0);
-
- if (t->left->flags & SHORTER) /* reverse scan */
- return crevdissect(v, t, begin, end);
-
- d = newdfa(v, &t->left->cnfa, &v->g->cmap, DOMALLOC);
- if (ISERR())
- return v->err;
- d2 = newdfa(v, &t->right->cnfa, &v->g->cmap, DOMALLOC);
- if (ISERR())
- {
- freedfa(d);
- return v->err;
- }
- MDEBUG(("cconcat %d\n", t->retry));
-
- /* pick a tentative midpoint */
- if (v->mem[t->retry] == 0)
- {
- mid = longest(v, d, begin, end, (int *) NULL);
- if (mid == NULL)
- {
- freedfa(d);
- freedfa(d2);
- return REG_NOMATCH;
- }
- MDEBUG(("tentative midpoint %ld\n", LOFF(mid)));
- v->mem[t->retry] = (mid - begin) + 1;
- }
- else
- {
- mid = begin + (v->mem[t->retry] - 1);
- MDEBUG(("working midpoint %ld\n", LOFF(mid)));
- }
-
- /* iterate until satisfaction or failure */
- for (;;)
- {
- /* try this midpoint on for size */
- er = cdissect(v, t->left, begin, mid);
- if (er == REG_OKAY &&
- longest(v, d2, mid, end, (int *) NULL) == end &&
- (er = cdissect(v, t->right, mid, end)) ==
- REG_OKAY)
- break; /* NOTE BREAK OUT */
- if (er != REG_OKAY && er != REG_NOMATCH)
- {
- freedfa(d);
- freedfa(d2);
- return er;
- }
-
- /* that midpoint didn't work, find a new one */
- if (mid == begin)
- {
- /* all possibilities exhausted */
- MDEBUG(("%d no midpoint\n", t->retry));
- freedfa(d);
- freedfa(d2);
- return REG_NOMATCH;
- }
- mid = longest(v, d, begin, mid - 1, (int *) NULL);
- if (mid == NULL)
- {
- /* failed to find a new one */
- MDEBUG(("%d failed midpoint\n", t->retry));
- freedfa(d);
- freedfa(d2);
- return REG_NOMATCH;
- }
- MDEBUG(("%d: new midpoint %ld\n", t->retry, LOFF(mid)));
- v->mem[t->retry] = (mid - begin) + 1;
- zapmem(v, t->left);
- zapmem(v, t->right);
- }
-
- /* satisfaction */
- MDEBUG(("successful\n"));
- freedfa(d);
- freedfa(d2);
- return REG_OKAY;
-}
-
-/*
- * crevdissect - determine backref shortest-first subexpression matches
- * The retry memory stores the offset of the trial midpoint from begin,
- * plus 1 so that 0 uniquely means "clean slate".
- */
-static int /* regexec return code */
-crevdissect(struct vars * v,
- struct subre * t,
- chr *begin, /* beginning of relevant substring */
- chr *end) /* end of same */
-{
- struct dfa *d;
- struct dfa *d2;
- chr *mid;
- int er;
-
- assert(t->op == '.');
- assert(t->left != NULL && t->left->cnfa.nstates > 0);
- assert(t->right != NULL && t->right->cnfa.nstates > 0);
- assert(t->left->flags & SHORTER);
-
- /* concatenation -- need to split the substring between parts */
- d = newdfa(v, &t->left->cnfa, &v->g->cmap, DOMALLOC);
- if (ISERR())
- return v->err;
- d2 = newdfa(v, &t->right->cnfa, &v->g->cmap, DOMALLOC);
- if (ISERR())
- {
- freedfa(d);
- return v->err;
- }
- MDEBUG(("crev %d\n", t->retry));
-
- /* pick a tentative midpoint */
- if (v->mem[t->retry] == 0)
- {
- mid = shortest(v, d, begin, begin, end, (chr **) NULL, (int *) NULL);
- if (mid == NULL)
- {
- freedfa(d);
- freedfa(d2);
- return REG_NOMATCH;
- }
- MDEBUG(("tentative midpoint %ld\n", LOFF(mid)));
- v->mem[t->retry] = (mid - begin) + 1;
- }
- else
- {
- mid = begin + (v->mem[t->retry] - 1);
- MDEBUG(("working midpoint %ld\n", LOFF(mid)));
- }
-
- /* iterate until satisfaction or failure */
- for (;;)
- {
- /* try this midpoint on for size */
- er = cdissect(v, t->left, begin, mid);
- if (er == REG_OKAY &&
- longest(v, d2, mid, end, (int *) NULL) == end &&
- (er = cdissect(v, t->right, mid, end)) ==
- REG_OKAY)
- break; /* NOTE BREAK OUT */
- if (er != REG_OKAY && er != REG_NOMATCH)
- {
- freedfa(d);
- freedfa(d2);
- return er;
- }
-
- /* that midpoint didn't work, find a new one */
- if (mid == end)
- {
- /* all possibilities exhausted */
- MDEBUG(("%d no midpoint\n", t->retry));
- freedfa(d);
- freedfa(d2);
- return REG_NOMATCH;
- }
- mid = shortest(v, d, begin, mid + 1, end, (chr **) NULL, (int *) NULL);
- if (mid == NULL)
- {
- /* failed to find a new one */
- MDEBUG(("%d failed midpoint\n", t->retry));
- freedfa(d);
- freedfa(d2);
- return REG_NOMATCH;
- }
- MDEBUG(("%d: new midpoint %ld\n", t->retry, LOFF(mid)));
- v->mem[t->retry] = (mid - begin) + 1;
- zapmem(v, t->left);
- zapmem(v, t->right);
- }
-
- /* satisfaction */
- MDEBUG(("successful\n"));
- freedfa(d);
- freedfa(d2);
- return REG_OKAY;
-}
-
-/*
- * cbrdissect - determine backref subexpression matches
- */
-static int /* regexec return code */
-cbrdissect(struct vars * v,
- struct subre * t,
- chr *begin, /* beginning of relevant substring */
- chr *end) /* end of same */
-{
- int i;
- int n = t->subno;
- size_t len;
- chr *paren;
- chr *p;
- chr *stop;
- int min = t->min;
- int max = t->max;
-
- assert(t != NULL);
- assert(t->op == 'b');
- assert(n >= 0);
- assert((size_t) n < v->nmatch);
-
- MDEBUG(("cbackref n%d %d{%d-%d}\n", t->retry, n, min, max));
-
- if (v->pmatch[n].rm_so == -1)
- return REG_NOMATCH;
- paren = v->start + v->pmatch[n].rm_so;
- len = v->pmatch[n].rm_eo - v->pmatch[n].rm_so;
-
- /* no room to maneuver -- retries are pointless */
- if (v->mem[t->retry])
- return REG_NOMATCH;
- v->mem[t->retry] = 1;
-
- /* special-case zero-length string */
- if (len == 0)
- {
- if (begin == end)
- return REG_OKAY;
- return REG_NOMATCH;
- }
-
- /* and too-short string */
- assert(end >= begin);
- if ((size_t) (end - begin) < len)
- return REG_NOMATCH;
- stop = end - len;
-
- /* count occurrences */
- i = 0;
- for (p = begin; p <= stop && (i < max || max == INFINITY); p += len)
- {
- if ((*v->g->compare) (paren, p, len) != 0)
- break;
- i++;
- }
- MDEBUG(("cbackref found %d\n", i));
-
- /* and sort it out */
- if (p != end) /* didn't consume all of it */
- return REG_NOMATCH;
- if (min <= i && (i <= max || max == INFINITY))
- return REG_OKAY;
- return REG_NOMATCH; /* out of range */
-}
-
-/*
- * caltdissect - determine alternative subexpression matches (w. complications)
- */
-static int /* regexec return code */
-caltdissect(struct vars * v,
- struct subre * t,
- chr *begin, /* beginning of relevant substring */
- chr *end) /* end of same */
-{
- struct dfa *d;
- int er;
-
-#define UNTRIED 0 /* not yet tried at all */
-#define TRYING 1 /* top matched, trying submatches */
-#define TRIED 2 /* top didn't match or submatches
- * exhausted */
-
- if (t == NULL)
- return REG_NOMATCH;
- assert(t->op == '|');
- if (v->mem[t->retry] == TRIED)
- return caltdissect(v, t->right, begin, end);
-
- MDEBUG(("calt n%d\n", t->retry));
- assert(t->left != NULL);
-
- if (v->mem[t->retry] == UNTRIED)
- {
- d = newdfa(v, &t->left->cnfa, &v->g->cmap, DOMALLOC);
- if (ISERR())
- return v->err;
- if (longest(v, d, begin, end, (int *) NULL) != end)
- {
- freedfa(d);
- v->mem[t->retry] = TRIED;
- return caltdissect(v, t->right, begin, end);
- }
- freedfa(d);
- MDEBUG(("calt matched\n"));
- v->mem[t->retry] = TRYING;
- }
-
- er = cdissect(v, t->left, begin, end);
- if (er != REG_NOMATCH)
- return er;
-
- v->mem[t->retry] = TRIED;
- return caltdissect(v, t->right, begin, end);
-}
-
-
-
-#include "rege_dfa.c"
diff --git a/src/regex/regfree.c b/src/regex/regfree.c
index 569bd204aa..52d09f6e66 100644
--- a/src/regex/regfree.c
+++ b/src/regex/regfree.c
@@ -1,57 +1,41 @@
-/*
- * regfree - free an RE
- *
- * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved.
- *
- * Development of this software was funded, in part, by Cray Research Inc.,
- * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
- * Corporation, none of whom are responsible for the results. The author
- * thanks all of them.
- *
- * Redistribution and use in source and binary forms -- with or without
- * modification -- are permitted for any purpose, provided that
- * redistributions in source form retain this entire copyright notice and
- * indicate the origin and nature of any modifications.
- *
- * I'd appreciate being given credit for this package in the documentation
- * of software which uses it, but that is not a requirement.
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
- * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
- * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
- * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
- * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
- * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
- * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
- * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * $Header: /projects/cvsroot/pgsql-server/src/backend/regex/regfree.c,v 1.17 2003/08/04 00:43:21 momjian Exp $
- *
- *
- * You might think that this could be incorporated into regcomp.c, and
- * that would be a reasonable idea... except that this is a generic
- * function (with a generic name), applicable to all compiled REs
- * regardless of the size of their characters, whereas the stuff in
- * regcomp.c gets compiled once per character size.
- */
-
-#include "regguts.h"
+#if defined(__MWERKS__) && !defined(__MACH__)
+typedef long off_t ;
+#else
+#include
+#endif
+#include
+#include
+#include "regex.h"
+#include "utils.h"
+#include "regex2.h"
/*
- * pg_regfree - free an RE (generic function, punts to RE-specific function)
- *
- * Ignoring invocation with NULL is a convenience.
+ - regfree - free everything
+ = extern void regfree(regex_t *);
*/
void
-regfree(regex_t *re)
-{ wx_regfree(re); }
-void
-wx_regfree(regex_t *re)
+regfree(preg)
+regex_t *preg;
{
- if (re == NULL)
+ register struct re_guts *g;
+
+ if (preg->re_magic != MAGIC1) /* oops */
+ return; /* nice to complain, but hard */
+
+ g = preg->re_g;
+ if (g == NULL || g->magic != MAGIC2) /* oops again */
return;
- (*((struct fns *) re->re_fns)->free) (re);
+ preg->re_magic = 0; /* mark it invalid */
+ g->magic = 0; /* mark it invalid */
+
+ if (g->strip != NULL)
+ free((char *)g->strip);
+ if (g->sets != NULL)
+ free((char *)g->sets);
+ if (g->setbits != NULL)
+ free((char *)g->setbits);
+ if (g->must != NULL)
+ free(g->must);
+ free((char *)g);
}
diff --git a/src/regex/regfronts.c b/src/regex/regfronts.c
deleted file mode 100644
index 82f48e2abc..0000000000
--- a/src/regex/regfronts.c
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * regcomp and regexec - front ends to re_ routines
- *
- * Mostly for implementation of backward-compatibility kludges. Note
- * that these routines exist ONLY in char versions.
- *
- * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved.
- *
- * Development of this software was funded, in part, by Cray Research Inc.,
- * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
- * Corporation, none of whom are responsible for the results. The author
- * thanks all of them.
- *
- * Redistribution and use in source and binary forms -- with or without
- * modification -- are permitted for any purpose, provided that
- * redistributions in source form retain this entire copyright notice and
- * indicate the origin and nature of any modifications.
- *
- * I'd appreciate being given credit for this package in the documentation
- * of software which uses it, but that is not a requirement.
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
- * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
- * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
- * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
- * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
- * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
- * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
- * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- */
-
-#include "regguts.h"
-
-/*
- - regcomp - compile regular expression
- */
-int
-regcomp(re, str, flags)
-regex_t *re;
-CONST char *str;
-int flags;
-{
- size_t len;
- int f = flags;
-
- if (f®_PEND) {
- len = re->re_endp - str;
- f &= ~REG_PEND;
- } else
- len = strlen(str);
-
- return re_comp(re, str, len, f);
-}
-
-/*
- - regexec - execute regular expression
- */
-int
-regexec(re, str, nmatch, pmatch, flags)
-regex_t *re;
-CONST char *str;
-size_t nmatch;
-regmatch_t pmatch[];
-int flags;
-{
- CONST char *start;
- size_t len;
- int f = flags;
-
- if (f®_STARTEND) {
- start = str + pmatch[0].rm_so;
- len = pmatch[0].rm_eo - pmatch[0].rm_so;
- f &= ~REG_STARTEND;
- } else {
- start = str;
- len = strlen(str);
- }
-
- return re_exec(re, start, len, nmatch, pmatch, f);
-}
diff --git a/src/regex/regguts.h b/src/regex/regguts.h
deleted file mode 100644
index aa12dbf445..0000000000
--- a/src/regex/regguts.h
+++ /dev/null
@@ -1,417 +0,0 @@
-/*
- * Internal interface definitions, etc., for the reg package
- *
- * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved.
- *
- * Development of this software was funded, in part, by Cray Research Inc.,
- * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
- * Corporation, none of whom are responsible for the results. The author
- * thanks all of them.
- *
- * Redistribution and use in source and binary forms -- with or without
- * modification -- are permitted for any purpose, provided that
- * redistributions in source form retain this entire copyright notice and
- * indicate the origin and nature of any modifications.
- *
- * I'd appreciate being given credit for this package in the documentation
- * of software which uses it, but that is not a requirement.
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
- * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
- * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
- * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
- * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
- * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
- * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
- * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * $Id$
- */
-
-
-
-/*
- * Environmental customization. It should not (I hope) be necessary to
- * alter the file you are now reading -- regcustom.h should handle it all,
- * given care here and elsewhere.
- */
-#include "regcustom.h"
-
-
-
-/*
- * Things that regcustom.h might override.
- */
-
-/* assertions */
-#ifndef assert
-#ifndef REG_DEBUG
-# ifndef NDEBUG
-# define NDEBUG /* no assertions */
-# endif
-#endif
-#include
-#endif
-
-/* voids */
-#ifndef DISCARD
-#define DISCARD void /* for throwing values away */
-#endif
-#ifndef VS
-#define VS(x) ((void *)(x)) /* cast something to generic ptr */
-#endif
-
-/* function-pointer declarator */
-#ifndef FUNCPTR
-#define FUNCPTR(name, args) (*name) args
-#endif
-
-/* memory allocation */
-#ifndef MALLOC
-#define MALLOC(n) malloc(n)
-#endif
-#ifndef REALLOC
-#define REALLOC(p, n) realloc(VS(p), n)
-#endif
-#ifndef FREE
-#define FREE(p) free(VS(p))
-#endif
-
-/* want size of a char in bits, and max value in bounded quantifiers */
-#ifndef CHAR_BIT
-#include
-#endif
-#ifndef _POSIX2_RE_DUP_MAX
-#define _POSIX2_RE_DUP_MAX 255 /* normally from */
-#endif
-
-
-
-/*
- * misc
- */
-
-#define NOTREACHED 0
-#define xxx 1
-
-#define DUPMAX _POSIX2_RE_DUP_MAX
-#define INFINITY (DUPMAX+1)
-
-#define REMAGIC 0xfed7 /* magic number for main struct */
-
-
-
-/*
- * debugging facilities
- */
-#ifdef REG_DEBUG
-/* FDEBUG does finite-state tracing */
-#define FDEBUG(arglist) { if (v->eflags®_FTRACE) printf arglist; }
-/* MDEBUG does higher-level tracing */
-#define MDEBUG(arglist) { if (v->eflags®_MTRACE) printf arglist; }
-#else
-#define FDEBUG(arglist) {}
-#define MDEBUG(arglist) {}
-#endif
-
-
-
-/*
- * bitmap manipulation
- */
-#define UBITS (CHAR_BIT * sizeof(unsigned))
-#define BSET(uv, sn) ((uv)[(sn)/UBITS] |= (unsigned)1 << ((sn)%UBITS))
-#define ISBSET(uv, sn) ((uv)[(sn)/UBITS] & ((unsigned)1 << ((sn)%UBITS)))
-
-
-
-/*
- * We dissect a chr into byts for colormap table indexing. Here we define
- * a byt, which will be the same as a byte on most machines... The exact
- * size of a byt is not critical, but about 8 bits is good, and extraction
- * of 8-bit chunks is sometimes especially fast.
- */
-#ifndef BYTBITS
-#define BYTBITS 8 /* bits in a byt */
-#endif
-#define BYTTAB (1<flags&FREECOL)
- union tree *block; /* block of solid color, if any */
-};
-
-/* the color map itself */
-struct colormap
-{
- int magic;
-#define CMMAGIC 0x876
- struct vars *v; /* for compile error reporting */
- size_t ncds; /* number of colordescs */
- size_t max; /* highest in use */
- color free; /* beginning of free chain (if non-0) */
- struct colordesc *cd;
-#define CDEND(cm) (&(cm)->cd[(cm)->max + 1])
-#define NINLINECDS ((size_t)10)
- struct colordesc cdspace[NINLINECDS];
- union tree tree[NBYTS]; /* tree top, plus fill blocks */
-};
-
-/* optimization magic to do fast chr->color mapping */
-#define B0(c) ((c) & BYTMASK)
-#define B1(c) (((c)>>BYTBITS) & BYTMASK)
-#define B2(c) (((c)>>(2*BYTBITS)) & BYTMASK)
-#define B3(c) (((c)>>(3*BYTBITS)) & BYTMASK)
-#if NBYTS == 1
-#define GETCOLOR(cm, c) ((cm)->tree->tcolor[B0(c)])
-#endif
-/* beware, for NBYTS>1, GETCOLOR() is unsafe -- 2nd arg used repeatedly */
-#if NBYTS == 2
-#define GETCOLOR(cm, c) ((cm)->tree->tptr[B1(c)]->tcolor[B0(c)])
-#endif
-#if NBYTS == 4
-#define GETCOLOR(cm, c) ((cm)->tree->tptr[B3(c)]->tptr[B2(c)]->tptr[B1(c)]->tcolor[B0(c)])
-#endif
-
-
-
-/*
- * Interface definitions for locale-interface functions in locale.c.
- * Multi-character collating elements (MCCEs) cause most of the trouble.
- */
-struct cvec
-{
- int nchrs; /* number of chrs */
- int chrspace; /* number of chrs possible */
- chr *chrs; /* pointer to vector of chrs */
- int nranges; /* number of ranges (chr pairs) */
- int rangespace; /* number of chrs possible */
- chr *ranges; /* pointer to vector of chr pairs */
- int nmcces; /* number of MCCEs */
- int mccespace; /* number of MCCEs possible */
- int nmccechrs; /* number of chrs used for MCCEs */
- chr *mcces[1]; /* pointers to 0-terminated MCCEs */
- /* and both batches of chrs are on the end */
-};
-
-/* caution: this value cannot be changed easily */
-#define MAXMCCE 2 /* length of longest MCCE */
-
-
-
-/*
- * definitions for NFA internal representation
- *
- * Having a "from" pointer within each arc may seem redundant, but it
- * saves a lot of hassle.
- */
-struct state;
-
-struct arc
-{
- int type;
-#define ARCFREE '\0'
- color co;
- struct state *from; /* where it's from (and contained within) */
- struct state *to; /* where it's to */
- struct arc *outchain; /* *from's outs chain or free chain */
-#define freechain outchain
- struct arc *inchain; /* *to's ins chain */
- struct arc *colorchain; /* color's arc chain */
-};
-
-struct arcbatch
-{ /* for bulk allocation of arcs */
- struct arcbatch *next;
-#define ABSIZE 10
- struct arc a[ABSIZE];
-};
-
-struct state
-{
- int no;
-#define FREESTATE (-1)
- char flag; /* marks special states */
- int nins; /* number of inarcs */
- struct arc *ins; /* chain of inarcs */
- int nouts; /* number of outarcs */
- struct arc *outs; /* chain of outarcs */
- struct arc *free; /* chain of free arcs */
- struct state *tmp; /* temporary for traversal algorithms */
- struct state *next; /* chain for traversing all */
- struct state *prev; /* back chain */
- struct arcbatch oas; /* first arcbatch, avoid malloc in easy
- * case */
- int noas; /* number of arcs used in first arcbatch */
-};
-
-struct nfa
-{
- struct state *pre; /* pre-initial state */
- struct state *init; /* initial state */
- struct state *final; /* final state */
- struct state *post; /* post-final state */
- int nstates; /* for numbering states */
- struct state *states; /* state-chain header */
- struct state *slast; /* tail of the chain */
- struct state *free; /* free list */
- struct colormap *cm; /* the color map */
- color bos[2]; /* colors, if any, assigned to BOS and BOL */
- color eos[2]; /* colors, if any, assigned to EOS and EOL */
- struct vars *v; /* simplifies compile error reporting */
- struct nfa *parent; /* parent NFA, if any */
-};
-
-
-
-/*
- * definitions for compacted NFA
- */
-struct carc
-{
- color co; /* COLORLESS is list terminator */
- int to; /* state number */
-};
-
-struct cnfa
-{
- int nstates; /* number of states */
- int ncolors; /* number of colors */
- int flags;
-#define HASLACONS 01 /* uses lookahead constraints */
- int pre; /* setup state number */
- int post; /* teardown state number */
- color bos[2]; /* colors, if any, assigned to BOS and BOL */
- color eos[2]; /* colors, if any, assigned to EOS and EOL */
- struct carc **states; /* vector of pointers to outarc lists */
- struct carc *arcs; /* the area for the lists */
-};
-
-#define ZAPCNFA(cnfa) ((cnfa).nstates = 0)
-#define NULLCNFA(cnfa) ((cnfa).nstates == 0)
-
-
-
-/*
- * subexpression tree
- */
-struct subre
-{
- char op; /* '|', '.' (concat), 'b' (backref), '(',
- * '=' */
- char flags;
-#define LONGER 01 /* prefers longer match */
-#define SHORTER 02 /* prefers shorter match */
-#define MIXED 04 /* mixed preference below */
-#define CAP 010 /* capturing parens below */
-#define BACKR 020 /* back reference below */
-#define INUSE 0100 /* in use in final tree */
-#define LOCAL 03 /* bits which may not propagate up */
-#define LMIX(f) ((f)<<2) /* LONGER -> MIXED */
-#define SMIX(f) ((f)<<1) /* SHORTER -> MIXED */
-#define UP(f) (((f)&~LOCAL) | (LMIX(f) & SMIX(f) & MIXED))
-#define MESSY(f) ((f)&(MIXED|CAP|BACKR))
-#define PREF(f) ((f)&LOCAL)
-#define PREF2(f1, f2) ((PREF(f1) != 0) ? PREF(f1) : PREF(f2))
-#define COMBINE(f1, f2) (UP((f1)|(f2)) | PREF2(f1, f2))
- short retry; /* index into retry memory */
- int subno; /* subexpression number (for 'b' and '(') */
- short min; /* min repetitions, for backref only */
- short max; /* max repetitions, for backref only */
- struct subre *left; /* left child, if any (also freelist
- * chain) */
- struct subre *right; /* right child, if any */
- struct state *begin; /* outarcs from here... */
- struct state *end; /* ...ending in inarcs here */
- struct cnfa cnfa; /* compacted NFA, if any */
- struct subre *chain; /* for bookkeeping and error cleanup */
-};
-
-
-
-/*
- * table of function pointers for generic manipulation functions
- * A regex_t's re_fns points to one of these.
- */
-struct fns
-{
- void FUNCPTR(free, (regex_t *));
-};
-
-
-
-/*
- * the insides of a regex_t, hidden behind a void *
- */
-struct guts
-{
- int magic;
-#define GUTSMAGIC 0xfed9
- int cflags; /* copy of compile flags */
- long info; /* copy of re_info */
- size_t nsub; /* copy of re_nsub */
- struct subre *tree;
- struct cnfa search; /* for fast preliminary search */
- int ntree;
- struct colormap cmap;
- int FUNCPTR(compare, (const chr *, const chr *, size_t));
- struct subre *lacons; /* lookahead-constraint vector */
- int nlacons; /* size of lacons */
-};
diff --git a/src/regex/split.c b/src/regex/split.c
new file mode 100644
index 0000000000..188bdb775b
--- /dev/null
+++ b/src/regex/split.c
@@ -0,0 +1,316 @@
+#include
+#include
+
+/*
+ - split - divide a string into fields, like awk split()
+ = int split(char *string, char *fields[], int nfields, char *sep);
+ */
+int /* number of fields, including overflow */
+split(string, fields, nfields, sep)
+char *string;
+char *fields[]; /* list is not NULL-terminated */
+int nfields; /* number of entries available in fields[] */
+char *sep; /* "" white, "c" single char, "ab" [ab]+ */
+{
+ register char *p = string;
+ register char c; /* latest character */
+ register char sepc = sep[0];
+ register char sepc2;
+ register int fn;
+ register char **fp = fields;
+ register char *sepp;
+ register int trimtrail;
+
+ /* white space */
+ if (sepc == '\0') {
+ while ((c = *p++) == ' ' || c == '\t')
+ continue;
+ p--;
+ trimtrail = 1;
+ sep = " \t"; /* note, code below knows this is 2 long */
+ sepc = ' ';
+ } else
+ trimtrail = 0;
+ sepc2 = sep[1]; /* now we can safely pick this up */
+
+ /* catch empties */
+ if (*p == '\0')
+ return(0);
+
+ /* single separator */
+ if (sepc2 == '\0') {
+ fn = nfields;
+ for (;;) {
+ *fp++ = p;
+ fn--;
+ if (fn == 0)
+ break;
+ while ((c = *p++) != sepc)
+ if (c == '\0')
+ return(nfields - fn);
+ *(p-1) = '\0';
+ }
+ /* we have overflowed the fields vector -- just count them */
+ fn = nfields;
+ for (;;) {
+ while ((c = *p++) != sepc)
+ if (c == '\0')
+ return(fn);
+ fn++;
+ }
+ /* not reached */
+ }
+
+ /* two separators */
+ if (sep[2] == '\0') {
+ fn = nfields;
+ for (;;) {
+ *fp++ = p;
+ fn--;
+ while ((c = *p++) != sepc && c != sepc2)
+ if (c == '\0') {
+ if (trimtrail && **(fp-1) == '\0')
+ fn++;
+ return(nfields - fn);
+ }
+ if (fn == 0)
+ break;
+ *(p-1) = '\0';
+ while ((c = *p++) == sepc || c == sepc2)
+ continue;
+ p--;
+ }
+ /* we have overflowed the fields vector -- just count them */
+ fn = nfields;
+ while (c != '\0') {
+ while ((c = *p++) == sepc || c == sepc2)
+ continue;
+ p--;
+ fn++;
+ while ((c = *p++) != '\0' && c != sepc && c != sepc2)
+ continue;
+ }
+ /* might have to trim trailing white space */
+ if (trimtrail) {
+ p--;
+ while ((c = *--p) == sepc || c == sepc2)
+ continue;
+ p++;
+ if (*p != '\0') {
+ if (fn == nfields+1)
+ *p = '\0';
+ fn--;
+ }
+ }
+ return(fn);
+ }
+
+ /* n separators */
+ fn = 0;
+ for (;;) {
+ if (fn < nfields)
+ *fp++ = p;
+ fn++;
+ for (;;) {
+ c = *p++;
+ if (c == '\0')
+ return(fn);
+ sepp = sep;
+ while ((sepc = *sepp++) != '\0' && sepc != c)
+ continue;
+ if (sepc != '\0') /* it was a separator */
+ break;
+ }
+ if (fn < nfields)
+ *(p-1) = '\0';
+ for (;;) {
+ c = *p++;
+ sepp = sep;
+ while ((sepc = *sepp++) != '\0' && sepc != c)
+ continue;
+ if (sepc == '\0') /* it wasn't a separator */
+ break;
+ }
+ p--;
+ }
+
+ /* not reached */
+}
+
+#ifdef TEST_SPLIT
+
+
+/*
+ * test program
+ * pgm runs regression
+ * pgm sep splits stdin lines by sep
+ * pgm str sep splits str by sep
+ * pgm str sep n splits str by sep n times
+ */
+int
+main(argc, argv)
+int argc;
+char *argv[];
+{
+ char buf[512];
+ register int n;
+# define MNF 10
+ char *fields[MNF];
+
+ if (argc > 4)
+ for (n = atoi(argv[3]); n > 0; n--) {
+ (void) strcpy(buf, argv[1]);
+ }
+ else if (argc > 3)
+ for (n = atoi(argv[3]); n > 0; n--) {
+ (void) strcpy(buf, argv[1]);
+ (void) split(buf, fields, MNF, argv[2]);
+ }
+ else if (argc > 2)
+ dosplit(argv[1], argv[2]);
+ else if (argc > 1)
+ while (fgets(buf, sizeof(buf), stdin) != NULL) {
+ buf[strlen(buf)-1] = '\0'; /* stomp newline */
+ dosplit(buf, argv[1]);
+ }
+ else
+ regress();
+
+ exit(0);
+}
+
+dosplit(string, seps)
+char *string;
+char *seps;
+{
+# define NF 5
+ char *fields[NF];
+ register int nf;
+
+ nf = split(string, fields, NF, seps);
+ print(nf, NF, fields);
+}
+
+print(nf, nfp, fields)
+int nf;
+int nfp;
+char *fields[];
+{
+ register int fn;
+ register int bound;
+
+ bound = (nf > nfp) ? nfp : nf;
+ printf("%d:\t", nf);
+ for (fn = 0; fn < bound; fn++)
+ printf("\"%s\"%s", fields[fn], (fn+1 < nf) ? ", " : "\n");
+}
+
+#define RNF 5 /* some table entries know this */
+struct {
+ char *str;
+ char *seps;
+ int nf;
+ char *fi[RNF];
+} tests[] = {
+ "", " ", 0, { "" },
+ " ", " ", 2, { "", "" },
+ "x", " ", 1, { "x" },
+ "xy", " ", 1, { "xy" },
+ "x y", " ", 2, { "x", "y" },
+ "abc def g ", " ", 5, { "abc", "def", "", "g", "" },
+ " a bcd", " ", 4, { "", "", "a", "bcd" },
+ "a b c d e f", " ", 6, { "a", "b", "c", "d", "e f" },
+ " a b c d ", " ", 6, { "", "a", "b", "c", "d " },
+
+ "", " _", 0, { "" },
+ " ", " _", 2, { "", "" },
+ "x", " _", 1, { "x" },
+ "x y", " _", 2, { "x", "y" },
+ "ab _ cd", " _", 2, { "ab", "cd" },
+ " a_b c ", " _", 5, { "", "a", "b", "c", "" },
+ "a b c_d e f", " _", 6, { "a", "b", "c", "d", "e f" },
+ " a b c d ", " _", 6, { "", "a", "b", "c", "d " },
+
+ "", " _~", 0, { "" },
+ " ", " _~", 2, { "", "" },
+ "x", " _~", 1, { "x" },
+ "x y", " _~", 2, { "x", "y" },
+ "ab _~ cd", " _~", 2, { "ab", "cd" },
+ " a_b c~", " _~", 5, { "", "a", "b", "c", "" },
+ "a b_c d~e f", " _~", 6, { "a", "b", "c", "d", "e f" },
+ "~a b c d ", " _~", 6, { "", "a", "b", "c", "d " },
+
+ "", " _~-", 0, { "" },
+ " ", " _~-", 2, { "", "" },
+ "x", " _~-", 1, { "x" },
+ "x y", " _~-", 2, { "x", "y" },
+ "ab _~- cd", " _~-", 2, { "ab", "cd" },
+ " a_b c~", " _~-", 5, { "", "a", "b", "c", "" },
+ "a b_c-d~e f", " _~-", 6, { "a", "b", "c", "d", "e f" },
+ "~a-b c d ", " _~-", 6, { "", "a", "b", "c", "d " },
+
+ "", " ", 0, { "" },
+ " ", " ", 2, { "", "" },
+ "x", " ", 1, { "x" },
+ "xy", " ", 1, { "xy" },
+ "x y", " ", 2, { "x", "y" },
+ "abc def g ", " ", 4, { "abc", "def", "g", "" },
+ " a bcd", " ", 3, { "", "a", "bcd" },
+ "a b c d e f", " ", 6, { "a", "b", "c", "d", "e f" },
+ " a b c d ", " ", 6, { "", "a", "b", "c", "d " },
+
+ "", "", 0, { "" },
+ " ", "", 0, { "" },
+ "x", "", 1, { "x" },
+ "xy", "", 1, { "xy" },
+ "x y", "", 2, { "x", "y" },
+ "abc def g ", "", 3, { "abc", "def", "g" },
+ "\t a bcd", "", 2, { "a", "bcd" },
+ " a \tb\t c ", "", 3, { "a", "b", "c" },
+ "a b c d e ", "", 5, { "a", "b", "c", "d", "e" },
+ "a b\tc d e f", "", 6, { "a", "b", "c", "d", "e f" },
+ " a b c d e f ", "", 6, { "a", "b", "c", "d", "e f " },
+
+ NULL, NULL, 0, { NULL },
+};
+
+regress()
+{
+ char buf[512];
+ register int n;
+ char *fields[RNF+1];
+ register int nf;
+ register int i;
+ register int printit;
+ register char *f;
+
+ for (n = 0; tests[n].str != NULL; n++) {
+ (void) strcpy(buf, tests[n].str);
+ fields[RNF] = NULL;
+ nf = split(buf, fields, RNF, tests[n].seps);
+ printit = 0;
+ if (nf != tests[n].nf) {
+ printf("split `%s' by `%s' gave %d fields, not %d\n",
+ tests[n].str, tests[n].seps, nf, tests[n].nf);
+ printit = 1;
+ } else if (fields[RNF] != NULL) {
+ printf("split() went beyond array end\n");
+ printit = 1;
+ } else {
+ for (i = 0; i < nf && i < RNF; i++) {
+ f = fields[i];
+ if (f == NULL)
+ f = "(NULL)";
+ if (strcmp(f, tests[n].fi[i]) != 0) {
+ printf("split `%s' by `%s', field %d is `%s', not `%s'\n",
+ tests[n].str, tests[n].seps,
+ i, fields[i], tests[n].fi[i]);
+ printit = 1;
+ }
+ }
+ }
+ if (printit)
+ print(nf, RNF, fields);
+ }
+}
+#endif
diff --git a/src/regex/tclUniData.c b/src/regex/tclUniData.c
deleted file mode 100644
index 9f0c6e05ae..0000000000
--- a/src/regex/tclUniData.c
+++ /dev/null
@@ -1,904 +0,0 @@
-/*
- * tclUniData.c --
- *
- * Declarations of Unicode character information tables. This file is
- * automatically generated by the tools/uniParse.tcl script. Do not
- * modify this file by hand.
- *
- * Copyright (c) 1998 by Scriptics Corporation.
- * All rights reserved.
- *
- * RCS: @(#) $Id$
- */
-
-/*
- * A 16-bit Unicode character is split into two parts in order to index
- * into the following tables. The lower OFFSET_BITS comprise an offset
- * into a page of characters. The upper bits comprise the page number.
- */
-
-#define OFFSET_BITS 5
-
-/*
- * The pageMap is indexed by page number and returns an alternate page number
- * that identifies a unique page of characters. Many Unicode characters map
- * to the same alternate page number.
- */
-
-static unsigned char pageMap[] = {
- 0, 1, 2, 3, 0, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 7, 15, 16, 17,
- 18, 19, 20, 21, 22, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 7, 32,
- 7, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 47,
- 48, 49, 50, 51, 52, 35, 47, 53, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
- 58, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 80, 81,
- 84, 85, 80, 86, 87, 88, 89, 90, 91, 92, 35, 93, 94, 95, 35, 96, 97,
- 98, 99, 100, 101, 102, 35, 47, 103, 104, 35, 35, 105, 106, 107, 47,
- 47, 108, 47, 47, 109, 47, 110, 111, 47, 112, 47, 113, 114, 115, 116,
- 114, 47, 117, 118, 35, 47, 47, 119, 90, 47, 47, 47, 47, 47, 47, 47,
- 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 120, 121, 47, 47, 122,
- 35, 35, 35, 35, 47, 123, 124, 125, 126, 47, 127, 128, 47, 129, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 7, 7, 7, 7, 130, 7, 7, 131, 132, 133, 134,
- 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148,
- 149, 150, 151, 152, 153, 154, 155, 156, 156, 156, 156, 156, 156, 156,
- 157, 158, 159, 160, 161, 162, 35, 35, 35, 160, 163, 164, 165, 166,
- 167, 168, 169, 160, 160, 160, 160, 170, 171, 172, 173, 174, 160, 160,
- 175, 35, 35, 35, 35, 176, 177, 178, 179, 180, 181, 35, 35, 160, 160,
- 160, 160, 160, 160, 160, 160, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 182, 160, 160, 155, 160, 160, 160, 160, 160, 160, 170, 183, 184, 185,
- 90, 47, 186, 90, 47, 187, 188, 189, 47, 47, 190, 128, 35, 35, 191,
- 192, 193, 194, 192, 195, 196, 197, 160, 160, 160, 198, 160, 160, 199,
- 197, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47,
- 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47,
- 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47,
- 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47,
- 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47,
- 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47,
- 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47,
- 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47,
- 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47,
- 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47,
- 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47,
- 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47,
- 47, 47, 200, 35, 35, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47,
- 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47,
- 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47,
- 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47,
- 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47,
- 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47,
- 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47,
- 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47,
- 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47,
- 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47,
- 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47,
- 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47,
- 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47,
- 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47,
- 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47,
- 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47,
- 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47,
- 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47,
- 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47,
- 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47,
- 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47,
- 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47,
- 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47,
- 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47,
- 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47,
- 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47,
- 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47,
- 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47,
- 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47,
- 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47,
- 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47,
- 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47,
- 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47,
- 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47,
- 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47,
- 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47,
- 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47,
- 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47,
- 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 201, 35, 35, 47, 47,
- 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47,
- 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47,
- 202, 203, 204, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 47, 47, 47, 47, 47, 47, 47,
- 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47,
- 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47,
- 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47,
- 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47,
- 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47,
- 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47,
- 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47,
- 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47,
- 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47,
- 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47,
- 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47,
- 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47,
- 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47,
- 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47,
- 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47,
- 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47,
- 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47,
- 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47,
- 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47,
- 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47,
- 47, 47, 205, 35, 35, 206, 206, 206, 206, 206, 206, 206, 206, 206, 206,
- 206, 206, 206, 206, 206, 206, 206, 206, 206, 206, 206, 206, 206, 206,
- 206, 206, 206, 206, 206, 206, 206, 206, 206, 206, 206, 206, 206, 206,
- 206, 206, 206, 206, 206, 206, 206, 206, 206, 206, 206, 206, 206, 206,
- 206, 206, 206, 206, 206, 206, 206, 206, 206, 206, 206, 206, 207, 207,
- 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207,
- 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207,
- 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207,
- 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207,
- 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207,
- 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207,
- 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207,
- 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207,
- 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207,
- 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207,
- 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207,
- 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207,
- 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207,
- 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207,
- 207, 207, 47, 47, 47, 47, 47, 47, 47, 47, 47, 208, 35, 35, 35, 35,
- 35, 35, 209, 210, 211, 47, 47, 212, 213, 47, 47, 47, 47, 47, 47, 47,
- 47, 47, 47, 214, 215, 47, 216, 47, 217, 218, 35, 219, 220, 221, 47,
- 47, 47, 222, 223, 2, 224, 225, 226, 227, 228, 229, 230, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 231, 35, 232, 233,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47,
- 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47,
- 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47,
- 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47,
- 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47,
- 47, 208, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 47, 234, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 235, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207,
- 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207,
- 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207,
- 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207,
- 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207,
- 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207,
- 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207,
- 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207,
- 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207,
- 207, 207, 207, 236, 207, 207, 207, 207, 207, 207, 207, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35
-};
-
-/*
- * The groupMap is indexed by combining the alternate page number with
- * the page offset and returns a group number that identifies a unique
- * set of character attributes.
- */
-
-static unsigned char groupMap[] = {
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, 3, 3, 4, 3, 3, 3, 5, 6, 3, 7, 3, 8,
- 3, 3, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 3, 3, 7, 7, 7, 3, 3, 10, 10, 10,
- 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
- 10, 10, 10, 10, 10, 10, 5, 3, 6, 11, 12, 11, 13, 13, 13, 13, 13, 13,
- 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
- 13, 13, 13, 5, 7, 6, 7, 1, 2, 3, 4, 4, 4, 4, 14, 14, 11, 14, 15, 16,
- 7, 8, 14, 11, 14, 7, 17, 17, 11, 18, 14, 3, 11, 17, 15, 19, 17, 17,
- 17, 3, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
- 10, 10, 10, 10, 10, 10, 10, 10, 7, 10, 10, 10, 10, 10, 10, 10, 15,
- 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
- 13, 13, 13, 13, 13, 13, 7, 13, 13, 13, 13, 13, 13, 13, 20, 21, 22,
- 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21,
- 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22,
- 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 23, 24, 21, 22, 21,
- 22, 21, 22, 15, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21,
- 22, 21, 22, 15, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21,
- 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22,
- 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 25,
- 21, 22, 21, 22, 21, 22, 26, 15, 27, 21, 22, 21, 22, 28, 21, 22, 29,
- 29, 21, 22, 15, 30, 31, 32, 21, 22, 29, 33, 34, 35, 36, 21, 22, 15,
- 15, 35, 37, 15, 38, 21, 22, 21, 22, 21, 22, 39, 21, 22, 39, 15, 15,
- 21, 22, 39, 21, 22, 40, 40, 21, 22, 21, 22, 41, 21, 22, 15, 42, 21,
- 22, 15, 43, 42, 42, 42, 42, 44, 45, 46, 44, 45, 46, 44, 45, 46, 21,
- 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 47, 21,
- 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22,
- 15, 44, 45, 46, 21, 22, 48, 49, 21, 22, 21, 22, 21, 22, 21, 22, 0,
- 0, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22,
- 21, 22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 15, 15, 15, 50, 51, 15, 52, 52, 15, 53, 15,
- 54, 15, 15, 15, 15, 52, 15, 15, 55, 15, 15, 15, 15, 56, 57, 15, 15,
- 15, 15, 15, 57, 15, 15, 58, 15, 15, 59, 15, 15, 15, 15, 15, 15, 15,
- 15, 15, 15, 60, 15, 15, 60, 15, 15, 15, 15, 60, 15, 61, 61, 15, 15,
- 15, 15, 15, 15, 62, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
- 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 0, 0, 63,
- 63, 63, 63, 63, 63, 63, 63, 63, 11, 11, 63, 63, 63, 63, 63, 63, 63,
- 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 63, 63, 11,
- 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 63, 63, 63, 63,
- 63, 11, 11, 11, 11, 11, 11, 11, 11, 11, 63, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
- 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
- 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 65, 64, 64, 64, 64, 64, 64,
- 64, 64, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 64,
- 64, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11, 11,
- 0, 0, 0, 0, 63, 0, 0, 0, 3, 0, 0, 0, 0, 0, 11, 11, 66, 3, 67, 67, 67,
- 0, 68, 0, 69, 69, 15, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
- 10, 10, 10, 10, 10, 0, 10, 10, 10, 10, 10, 10, 10, 10, 10, 70, 71,
- 71, 71, 15, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
- 13, 13, 13, 72, 13, 13, 13, 13, 13, 13, 13, 13, 13, 73, 74, 74, 0,
- 75, 76, 77, 77, 77, 78, 79, 15, 0, 0, 21, 22, 21, 22, 21, 22, 21, 22,
- 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 80, 81, 47,
- 15, 82, 83, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 84, 84, 84, 84, 84, 84, 84,
- 84, 84, 84, 84, 84, 84, 84, 84, 84, 10, 10, 10, 10, 10, 10, 10, 10,
- 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
- 10, 10, 10, 10, 10, 10, 10, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
- 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
- 13, 13, 13, 13, 13, 81, 81, 81, 81, 81, 81, 81, 81, 81, 81, 81, 81,
- 81, 81, 81, 81, 21, 22, 14, 64, 64, 64, 64, 0, 85, 85, 0, 0, 21, 22,
- 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21,
- 22, 77, 21, 22, 21, 22, 0, 0, 21, 22, 0, 0, 21, 22, 0, 0, 0, 21, 22,
- 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21,
- 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22,
- 21, 22, 0, 0, 21, 22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 86, 86, 86, 86,
- 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86,
- 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86,
- 0, 0, 63, 3, 3, 3, 3, 3, 3, 0, 87, 87, 87, 87, 87, 87, 87, 87, 87,
- 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87,
- 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 15, 0, 3, 8, 0, 0,
- 0, 0, 0, 0, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
- 64, 64, 64, 0, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
- 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 0, 64, 64, 64, 3, 64, 3, 64,
- 64, 3, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42, 42, 42, 42, 42, 42,
- 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
- 42, 42, 42, 42, 0, 0, 0, 0, 0, 42, 42, 42, 3, 3, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 3, 0, 42, 42, 42, 42, 42, 42,
- 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
- 42, 42, 42, 0, 0, 0, 0, 0, 63, 42, 42, 42, 42, 42, 42, 42, 42, 42,
- 42, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 3, 3, 3, 3, 0, 0, 64, 42, 42,
- 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
- 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
- 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
- 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 3, 42, 64,
- 64, 64, 64, 64, 64, 64, 85, 85, 64, 64, 64, 64, 64, 64, 63, 63, 64,
- 64, 14, 64, 64, 64, 64, 0, 0, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 42, 42,
- 42, 14, 14, 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 88, 42,
- 64, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
- 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 0, 0, 0, 64, 64, 64, 64,
- 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
- 64, 64, 64, 64, 64, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 42, 42, 42, 42, 42, 42, 64, 64, 64, 64, 64, 64, 64,
- 64, 64, 64, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 64,
- 64, 89, 0, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
- 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
- 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
- 42, 42, 42, 42, 42, 0, 0, 64, 42, 89, 89, 89, 64, 64, 64, 64, 64, 64,
- 64, 64, 89, 89, 89, 89, 64, 0, 0, 42, 64, 64, 64, 64, 0, 0, 0, 42,
- 42, 42, 42, 42, 42, 42, 42, 42, 42, 64, 64, 3, 3, 9, 9, 9, 9, 9, 9,
- 9, 9, 9, 9, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 64,
- 89, 89, 0, 42, 42, 42, 42, 42, 42, 42, 42, 0, 0, 42, 42, 0, 0, 42,
- 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
- 42, 42, 42, 42, 0, 42, 42, 42, 42, 42, 42, 42, 0, 42, 0, 0, 0, 42,
- 42, 42, 42, 0, 0, 64, 0, 89, 89, 89, 64, 64, 64, 64, 0, 0, 89, 89,
- 0, 0, 89, 89, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 89, 0, 0, 0, 0, 42, 42,
- 0, 42, 42, 42, 64, 64, 0, 0, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 42, 42,
- 4, 4, 17, 17, 17, 17, 17, 17, 14, 0, 0, 0, 0, 0, 0, 0, 64, 0, 0, 42,
- 42, 42, 42, 42, 42, 0, 0, 0, 0, 42, 42, 0, 0, 42, 42, 42, 42, 42, 42,
- 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 0,
- 42, 42, 42, 42, 42, 42, 42, 0, 42, 42, 0, 42, 42, 0, 42, 42, 0, 0,
- 64, 0, 89, 89, 89, 64, 64, 0, 0, 0, 0, 64, 64, 0, 0, 64, 64, 64, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42, 42, 42, 42, 0, 42, 0, 0, 0, 0, 0,
- 0, 0, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 64, 64, 42, 42, 42, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 64, 64, 89, 0, 42, 42, 42, 42, 42, 42, 42,
- 0, 42, 0, 42, 42, 42, 0, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
- 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 0, 42, 42, 42, 42, 42,
- 42, 42, 0, 42, 42, 0, 42, 42, 42, 42, 42, 0, 0, 64, 42, 89, 89, 89,
- 64, 64, 64, 64, 64, 0, 64, 64, 89, 0, 89, 89, 64, 0, 0, 42, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42, 0, 0, 0, 0, 0, 9, 9, 9, 9,
- 9, 9, 9, 9, 9, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42,
- 42, 42, 42, 42, 42, 42, 42, 42, 0, 42, 42, 42, 42, 42, 42, 42, 0, 42,
- 42, 0, 0, 42, 42, 42, 42, 0, 0, 64, 42, 89, 64, 89, 64, 64, 64, 0,
- 0, 0, 89, 89, 0, 0, 89, 89, 64, 0, 0, 0, 0, 0, 0, 0, 0, 64, 89, 0,
- 0, 0, 0, 42, 42, 0, 42, 42, 42, 0, 0, 0, 0, 9, 9, 9, 9, 9, 9, 9, 9,
- 9, 9, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, 89,
- 0, 42, 42, 42, 42, 42, 42, 0, 0, 0, 42, 42, 42, 0, 42, 42, 42, 42,
- 0, 0, 0, 42, 42, 0, 42, 0, 42, 42, 0, 0, 0, 42, 42, 0, 0, 0, 42, 42,
- 42, 0, 0, 0, 42, 42, 42, 42, 42, 42, 42, 42, 0, 42, 42, 42, 0, 0, 0,
- 0, 89, 89, 64, 89, 89, 0, 0, 0, 89, 89, 89, 0, 89, 89, 89, 64, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 89, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 9, 9, 9, 9, 9, 9, 9, 9, 9, 17, 17, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 89, 89, 89, 0, 42, 42, 42, 42, 42, 42, 42, 42, 0, 42,
- 42, 42, 0, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
- 42, 42, 42, 42, 42, 42, 42, 42, 42, 0, 42, 42, 42, 42, 42, 42, 42,
- 42, 42, 42, 0, 42, 42, 42, 42, 42, 0, 0, 0, 0, 64, 64, 64, 89, 89,
- 89, 89, 0, 64, 64, 64, 0, 64, 64, 64, 64, 0, 0, 0, 0, 0, 0, 0, 64,
- 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42, 42, 0, 0, 0, 0, 9, 9, 9, 9, 9, 9,
- 9, 9, 9, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 89,
- 89, 0, 42, 42, 42, 42, 42, 42, 42, 42, 0, 42, 42, 42, 0, 42, 42, 42,
- 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
- 42, 42, 42, 0, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 0, 42, 42, 42,
- 42, 42, 0, 0, 0, 0, 89, 64, 89, 89, 89, 89, 89, 0, 64, 89, 89, 0, 89,
- 89, 64, 64, 0, 0, 0, 0, 0, 0, 0, 89, 89, 0, 0, 0, 0, 0, 0, 0, 42, 0,
- 42, 42, 42, 42, 42, 42, 42, 42, 42, 0, 42, 42, 42, 42, 42, 42, 42,
- 42, 42, 42, 42, 42, 42, 42, 42, 42, 0, 0, 0, 0, 89, 89, 89, 64, 64,
- 64, 0, 0, 89, 89, 89, 0, 89, 89, 89, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 89, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 89, 89, 0, 42, 42, 42, 42, 42, 42,
- 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 0, 0, 0, 42, 42, 42,
- 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
- 42, 42, 42, 42, 0, 42, 42, 42, 42, 42, 42, 42, 42, 42, 0, 42, 0, 0,
- 42, 42, 42, 42, 42, 42, 42, 0, 0, 0, 64, 0, 0, 0, 0, 89, 89, 89, 64,
- 64, 64, 0, 64, 0, 89, 89, 89, 89, 89, 89, 89, 89, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 89, 89, 3, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
- 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
- 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
- 42, 64, 42, 42, 64, 64, 64, 64, 64, 64, 64, 0, 0, 0, 0, 4, 42, 42,
- 42, 42, 42, 42, 63, 64, 64, 64, 64, 64, 64, 64, 64, 3, 9, 9, 9, 9,
- 9, 9, 9, 9, 9, 9, 3, 3, 0, 0, 0, 0, 0, 42, 42, 0, 42, 0, 0, 42, 42,
- 0, 42, 0, 0, 42, 0, 0, 0, 0, 0, 0, 42, 42, 42, 42, 0, 42, 42, 42, 42,
- 42, 42, 42, 0, 42, 42, 42, 0, 42, 0, 42, 0, 0, 42, 42, 0, 42, 42, 42,
- 42, 64, 42, 42, 64, 64, 64, 64, 64, 64, 0, 64, 64, 42, 0, 0, 42, 42,
- 42, 42, 42, 0, 63, 0, 64, 64, 64, 64, 64, 64, 0, 0, 9, 9, 9, 9, 9,
- 9, 9, 9, 9, 9, 0, 0, 42, 42, 0, 0, 42, 14, 14, 14, 3, 3, 3, 3, 3, 3,
- 3, 3, 3, 3, 3, 3, 3, 3, 3, 14, 14, 14, 14, 14, 64, 64, 14, 14, 14,
- 14, 14, 14, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 17, 17, 17, 17, 17, 17, 17,
- 17, 17, 17, 14, 64, 14, 64, 14, 64, 5, 6, 5, 6, 89, 89, 42, 42, 42,
- 42, 42, 42, 42, 42, 0, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
- 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
- 42, 42, 42, 42, 42, 42, 0, 0, 0, 0, 0, 0, 64, 64, 64, 64, 64, 64, 64,
- 64, 64, 64, 64, 64, 64, 64, 89, 64, 64, 64, 64, 64, 3, 64, 64, 42,
- 42, 42, 42, 0, 0, 0, 0, 64, 64, 64, 64, 64, 64, 64, 64, 0, 64, 64,
- 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
- 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
- 0, 14, 14, 14, 14, 14, 14, 14, 14, 64, 14, 14, 14, 14, 14, 14, 0, 0,
- 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42, 42, 0, 42,
- 42, 42, 42, 42, 0, 42, 42, 0, 89, 64, 64, 64, 64, 89, 64, 0, 0, 0,
- 64, 64, 89, 64, 0, 0, 0, 0, 0, 0, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 3,
- 3, 3, 3, 3, 3, 42, 42, 42, 42, 42, 42, 89, 89, 64, 64, 0, 0, 0, 0,
- 0, 0, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77,
- 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77,
- 77, 77, 77, 77, 77, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42, 42, 42, 42, 42,
- 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
- 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
- 0, 0, 0, 0, 3, 0, 0, 0, 0, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
- 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 0,
- 0, 0, 0, 0, 42, 42, 42, 42, 0, 0, 0, 0, 0, 42, 42, 42, 42, 42, 42,
- 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
- 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
- 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 0, 0, 0, 0, 0, 0, 42, 42, 42,
- 42, 42, 42, 42, 0, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
- 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
- 42, 42, 0, 42, 0, 42, 42, 42, 42, 0, 0, 42, 42, 42, 42, 42, 42, 42,
- 0, 42, 0, 42, 42, 42, 42, 0, 0, 42, 42, 42, 42, 42, 42, 42, 0, 42,
- 0, 42, 42, 42, 42, 0, 0, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
- 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
- 42, 42, 42, 0, 42, 0, 42, 42, 42, 42, 0, 0, 42, 42, 42, 42, 42, 42,
- 42, 0, 42, 0, 42, 42, 42, 42, 0, 0, 42, 42, 42, 42, 42, 42, 42, 0,
- 42, 42, 42, 42, 42, 42, 42, 0, 42, 42, 42, 42, 42, 42, 42, 42, 42,
- 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 0, 42, 42,
- 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
- 42, 42, 42, 42, 0, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
- 42, 42, 42, 42, 42, 42, 42, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3,
- 3, 9, 9, 9, 9, 9, 9, 9, 9, 9, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
- 17, 0, 0, 0, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
- 42, 42, 42, 42, 42, 42, 42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42, 42,
- 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 3, 3, 42, 42, 42, 42, 42,
- 42, 42, 42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 42, 42, 42, 42, 42, 42, 42,
- 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
- 42, 42, 5, 6, 0, 0, 0, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
- 3, 3, 3, 90, 90, 90, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42,
- 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
- 42, 42, 89, 89, 89, 64, 64, 64, 64, 64, 64, 64, 89, 89, 89, 89, 89,
- 89, 89, 89, 64, 89, 89, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
- 3, 3, 3, 3, 3, 3, 3, 4, 3, 0, 0, 0, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3,
- 3, 3, 3, 3, 8, 3, 3, 3, 3, 88, 88, 88, 88, 0, 9, 9, 9, 9, 9, 9, 9,
- 9, 9, 9, 0, 0, 0, 0, 0, 0, 42, 42, 42, 63, 42, 42, 42, 42, 42, 42,
- 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
- 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
- 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 0, 0, 0, 0, 0, 0, 0,
- 0, 42, 42, 42, 42, 42, 42, 42, 42, 42, 64, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 21, 22, 21, 22, 21, 22, 21,
- 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 15, 15,
- 15, 15, 15, 91, 0, 0, 0, 0, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22,
- 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 0,
- 0, 0, 0, 0, 0, 92, 92, 92, 92, 92, 92, 92, 92, 93, 93, 93, 93, 93,
- 93, 93, 93, 92, 92, 92, 92, 92, 92, 0, 0, 93, 93, 93, 93, 93, 93, 0,
- 0, 92, 92, 92, 92, 92, 92, 92, 92, 93, 93, 93, 93, 93, 93, 93, 93,
- 92, 92, 92, 92, 92, 92, 92, 92, 93, 93, 93, 93, 93, 93, 93, 93, 92,
- 92, 92, 92, 92, 92, 0, 0, 93, 93, 93, 93, 93, 93, 0, 0, 15, 92, 15,
- 92, 15, 92, 15, 92, 0, 93, 0, 93, 0, 93, 0, 93, 92, 92, 92, 92, 92,
- 92, 92, 92, 93, 93, 93, 93, 93, 93, 93, 93, 94, 94, 95, 95, 95, 95,
- 96, 96, 97, 97, 98, 98, 99, 99, 0, 0, 92, 92, 92, 92, 92, 92, 92, 92,
- 100, 100, 100, 100, 100, 100, 100, 100, 92, 92, 92, 92, 92, 92, 92,
- 92, 100, 100, 100, 100, 100, 100, 100, 100, 92, 92, 92, 92, 92, 92,
- 92, 92, 100, 100, 100, 100, 100, 100, 100, 100, 92, 92, 15, 101, 15,
- 0, 15, 15, 93, 93, 102, 102, 103, 11, 104, 11, 11, 11, 15, 101, 15,
- 0, 15, 15, 105, 105, 105, 105, 103, 11, 11, 11, 92, 92, 15, 15, 0,
- 0, 15, 15, 93, 93, 106, 106, 0, 11, 11, 11, 92, 92, 15, 15, 15, 107,
- 15, 15, 93, 93, 108, 108, 109, 11, 11, 11, 0, 0, 15, 101, 15, 0, 15,
- 15, 110, 110, 111, 111, 103, 11, 11, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 88, 88, 88, 88, 8, 8, 8, 8, 8, 8, 3, 3, 16, 19, 5, 16, 16,
- 19, 5, 16, 3, 3, 3, 3, 3, 3, 3, 3, 112, 113, 88, 88, 88, 88, 88, 2,
- 3, 3, 3, 3, 3, 3, 3, 3, 3, 16, 19, 3, 3, 3, 3, 12, 12, 3, 3, 3, 7,
- 5, 6, 0, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 88, 88, 88, 88, 88, 88, 17,
- 0, 0, 0, 17, 17, 17, 17, 17, 17, 7, 7, 7, 5, 6, 15, 17, 17, 17, 17,
- 17, 17, 17, 17, 17, 17, 7, 7, 7, 5, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
- 64, 64, 64, 85, 85, 85, 85, 64, 85, 85, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 14, 14, 77,
- 14, 14, 14, 14, 77, 14, 14, 15, 77, 77, 77, 15, 15, 77, 77, 77, 15,
- 14, 77, 14, 14, 14, 77, 77, 77, 77, 77, 14, 14, 14, 14, 14, 14, 77,
- 14, 114, 14, 77, 14, 115, 116, 77, 77, 14, 15, 77, 77, 14, 77, 15,
- 42, 42, 42, 42, 15, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
- 17, 17, 17, 117, 117, 117, 117, 117, 117, 117, 117, 117, 117, 117,
- 117, 117, 117, 117, 117, 118, 118, 118, 118, 118, 118, 118, 118, 118,
- 118, 118, 118, 118, 118, 118, 118, 90, 90, 90, 90, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 7, 7, 7, 7, 7, 14, 14, 14, 14, 14, 7, 7, 14, 14,
- 14, 14, 7, 14, 14, 7, 14, 14, 7, 14, 14, 14, 14, 14, 14, 14, 7, 14,
- 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
- 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 7, 7, 14, 14, 7,
- 14, 7, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
- 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 14, 14, 14, 14, 14, 14, 14, 14, 7, 7, 7, 7, 14, 14,
- 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
- 14, 7, 7, 14, 14, 14, 14, 14, 14, 14, 5, 6, 14, 14, 14, 14, 14, 14,
- 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
- 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
- 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
- 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
- 14, 14, 14, 14, 14, 14, 14, 0, 14, 14, 14, 14, 14, 14, 14, 14, 14,
- 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
- 14, 14, 14, 14, 0, 0, 0, 0, 0, 14, 14, 14, 14, 14, 14, 14, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 14,
- 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 17, 17, 17, 17, 17, 17, 17, 17,
- 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
- 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
- 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
- 17, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
- 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 119, 119, 119, 119, 119, 119,
- 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119,
- 119, 119, 119, 119, 119, 119, 120, 120, 120, 120, 120, 120, 120, 120,
- 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120,
- 120, 120, 120, 120, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
- 14, 14, 14, 14, 14, 14, 14, 14, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 14,
- 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
- 14, 14, 14, 14, 14, 7, 14, 14, 14, 14, 14, 14, 14, 14, 14, 7, 14, 14,
- 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
- 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
- 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
- 14, 0, 0, 0, 0, 0, 0, 0, 0, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
- 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 0, 0, 0, 0, 0, 14, 14, 14,
- 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
- 14, 14, 7, 14, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 14,
- 14, 14, 14, 0, 14, 14, 14, 14, 0, 0, 14, 14, 14, 14, 14, 14, 14, 14,
- 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
- 14, 14, 14, 0, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
- 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
- 14, 14, 14, 14, 14, 0, 14, 0, 14, 14, 14, 14, 0, 0, 0, 14, 0, 14, 14,
- 14, 14, 14, 14, 14, 0, 0, 14, 14, 14, 14, 14, 14, 14, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
- 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
- 17, 17, 17, 14, 0, 0, 0, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
- 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 0, 14, 14, 14,
- 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 0, 14, 14, 14, 14, 14,
- 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
- 14, 14, 14, 14, 0, 14, 14, 14, 14, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 0,
- 0, 0, 0, 2, 3, 3, 3, 14, 63, 42, 90, 5, 6, 5, 6, 5, 6, 5, 6, 5, 6,
- 14, 14, 5, 6, 5, 6, 5, 6, 5, 6, 8, 5, 6, 6, 14, 90, 90, 90, 90, 90,
- 90, 90, 90, 90, 64, 64, 64, 64, 64, 64, 8, 63, 63, 63, 63, 63, 14,
- 14, 90, 90, 90, 0, 0, 0, 14, 14, 42, 42, 42, 42, 42, 42, 42, 42, 42,
- 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 0, 0, 0, 0, 64, 64,
- 11, 11, 63, 63, 0, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
- 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 12, 63,
- 63, 63, 0, 0, 0, 0, 0, 0, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
- 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
- 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 0, 0, 0, 0, 42, 42,
- 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
- 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 0, 14, 14, 17, 17, 17,
- 17, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
- 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
- 14, 14, 14, 14, 14, 14, 0, 0, 0, 17, 17, 17, 17, 17, 17, 17, 17, 17,
- 17, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
- 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 14, 14, 14,
- 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
- 14, 14, 14, 14, 14, 14, 14, 14, 0, 0, 0, 14, 14, 14, 14, 14, 14, 14,
- 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
- 0, 0, 0, 0, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
- 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
- 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 0,
- 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
- 14, 14, 14, 14, 14, 14, 0, 0, 0, 0, 14, 14, 14, 14, 14, 14, 14, 14,
- 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
- 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 0, 0, 42, 42, 42, 42, 42, 42,
- 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 42, 42, 42, 42, 42, 42, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42, 42,
- 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 0, 0, 0, 14, 14, 14, 14,
- 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 0, 0, 14, 14,
- 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 0, 14, 14,
- 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 0, 14, 14, 14, 0, 14, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 42, 42, 42, 42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 121, 121, 121, 121, 121, 121, 121,
- 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121,
- 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 122, 122, 122,
- 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
- 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
- 122, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15, 15, 15, 15, 15,
- 15, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15, 15, 15, 15, 15, 0,
- 0, 0, 0, 0, 42, 64, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 7, 42,
- 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 0, 42, 42, 42, 42,
- 42, 0, 42, 0, 42, 42, 0, 42, 42, 0, 42, 42, 42, 42, 42, 42, 42, 42,
- 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
- 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
- 42, 42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42, 42, 42, 42, 42, 42, 42,
- 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
- 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
- 42, 42, 5, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42, 42,
- 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
- 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 0, 0, 42, 42, 42,
- 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
- 42, 42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42, 42,
- 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 0, 0, 0, 0, 64, 64, 64, 64,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 8, 8, 12, 12, 5, 6, 5, 6, 5,
- 6, 5, 6, 5, 6, 5, 6, 5, 6, 5, 6, 0, 0, 0, 0, 3, 3, 3, 3, 12, 12, 12,
- 3, 3, 3, 0, 3, 3, 3, 3, 8, 5, 6, 5, 6, 5, 6, 3, 3, 3, 7, 8, 7, 7, 7,
- 0, 3, 4, 3, 3, 0, 0, 0, 0, 42, 42, 42, 0, 42, 0, 42, 42, 42, 42, 42,
- 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
- 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
- 0, 0, 88, 0, 3, 3, 3, 4, 3, 3, 3, 5, 6, 3, 7, 3, 8, 3, 3, 9, 9, 9,
- 9, 9, 9, 9, 9, 9, 9, 3, 3, 7, 7, 7, 3, 11, 13, 13, 13, 13, 13, 13,
- 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
- 13, 13, 13, 5, 7, 6, 7, 0, 0, 3, 5, 6, 3, 12, 42, 42, 42, 42, 42, 42,
- 42, 42, 42, 42, 63, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
- 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
- 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 63,
- 63, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
- 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 0, 0, 0,
- 42, 42, 42, 42, 42, 42, 0, 0, 42, 42, 42, 42, 42, 42, 0, 0, 42, 42,
- 42, 42, 42, 42, 0, 0, 42, 42, 42, 0, 0, 0, 4, 4, 7, 11, 14, 4, 4, 0,
- 14, 7, 7, 7, 7, 14, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 88, 88, 88, 14,
- 14, 42, 17, 42, 42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 123, 123, 123,
- 126, 126, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
- 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 89, 64, 14, 14, 14,
- 14, 14, 0, 0, 77, 77, 15, 15, 77, 15, 15, 77, 77, 15, 77, 77, 15, 77,
- 77, 15, 15, 77, 15, 15, 77, 77, 15, 77, 77, 15, 77, 77, 15, 15, 77,
- 15, 15, 77, 77, 15, 77, 77, 15, 77, 77, 15, 15, 77, 77, 15, 15, 77,
- 15, 15, 77, 77, 15, 15, 77, 15, 15, 77, 77, 15, 15, 9, 9, 9, 42, 42,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 88, 0, 88, 88, 88, 88, 88, 88, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 122, 122,
- 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
- 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
- 122
-};
-
-/*
- * Each group represents a unique set of character attributes. The attributes
- * are encoded into a 32-bit value as follows:
- *
- * Bits 0-4 Character category: see the constants listed below.
- *
- * Bits 5-7 Case delta type: 000 = identity
- * 010 = add delta for lower
- * 011 = add delta for lower, add 1 for title
- * 100 = sutract delta for title/upper
- * 101 = sub delta for upper, sub 1 for title
- * 110 = sub delta for upper, add delta for lower
- *
- * Bits 8-21 Reserved for future use.
- *
- * Bits 22-31 Case delta: delta for case conversions. This should be the
- * highest field so we can easily sign extend.
- */
-
-static int groups[] = {
- 0, 15, 12, 25, 27, 21, 22, 26, 20, 9, 134217793, 28, 19, 134217858,
- 29, 2, 23, 11, 1178599554, 24, -507510654, 4194369, 4194434, -834666431,
- 973078658, -507510719, 1258291330, 880803905, 864026689, 859832385,
- 331350081, 847249473, 851443777, 868220993, -406847358, 884998209,
- 876609601, 893386817, 897581121, 914358337, 910164033, 918552641,
- 5, -234880894, 8388705, 4194499, 8388770, 331350146, -406847423,
- -234880959, 880803970, 864026754, 859832450, 847249538, 851443842,
- 868221058, 876609666, 884998274, 893386882, 897581186, 914358402,
- 910164098, 918552706, 4, 6, -352321402, 159383617, 155189313,
- 268435521, 264241217, 159383682, 155189378, 130023554, 268435586,
- 264241282, 260046978, 239075458, 1, 197132418, 226492546, 360710274,
- 335544450, -251658175, 402653314, 335544385, 7, 201326657, 201326722,
- 16, 8, 10, 247464066, -33554302, -33554367, -310378366, -360710014,
- -419430270, -536870782, -469761918, -528482174, -33554365, -37748606,
- -310378431, -37748669, 155189378, -360710079, -419430335, -29359998,
- -469761983, -29360063, -536870847, -528482239, 13, 14, -1463812031,
- -801111999, -293601215, 67108938, 67109002, 109051997, 109052061,
- 18, 17, 8388673, 12582977, 8388738, 12583042
-};
-
-/*
- * The following constants are used to determine the category of a
- * Unicode character.
- */
-
-#define UNICODE_CATEGORY_MASK 0X1F
-
-enum {
- UNASSIGNED,
- UPPERCASE_LETTER,
- LOWERCASE_LETTER,
- TITLECASE_LETTER,
- MODIFIER_LETTER,
- OTHER_LETTER,
- NON_SPACING_MARK,
- ENCLOSING_MARK,
- COMBINING_SPACING_MARK,
- DECIMAL_DIGIT_NUMBER,
- LETTER_NUMBER,
- OTHER_NUMBER,
- SPACE_SEPARATOR,
- LINE_SEPARATOR,
- PARAGRAPH_SEPARATOR,
- CONTROL,
- FORMAT,
- PRIVATE_USE,
- SURROGATE,
- CONNECTOR_PUNCTUATION,
- DASH_PUNCTUATION,
- OPEN_PUNCTUATION,
- CLOSE_PUNCTUATION,
- INITIAL_QUOTE_PUNCTUATION,
- FINAL_QUOTE_PUNCTUATION,
- OTHER_PUNCTUATION,
- MATH_SYMBOL,
- CURRENCY_SYMBOL,
- MODIFIER_SYMBOL,
- OTHER_SYMBOL
-};
-
-/*
- * The following macros extract the fields of the character info. The
- * GetDelta() macro is complicated because we can't rely on the C compiler
- * to do sign extension on right shifts.
- */
-
-#define GetCaseType(info) (((info) & 0xE0) >> 5)
-#define GetCategory(info) ((info) & 0x1F)
-#define GetDelta(info) (((info) > 0) ? ((info) >> 22) : (~(~((info)) >> 22)))
-
-/*
- * This macro extracts the information about a character from the
- * Unicode character tables.
- */
-
-#define GetUniCharInfo(ch) (groups[groupMap[(pageMap[(((int)(ch)) & 0xffff) >> OFFSET_BITS] << OFFSET_BITS) | ((ch) & ((1 << OFFSET_BITS)-1))]])
-
diff --git a/src/regex/utils.h b/src/regex/utils.h
new file mode 100644
index 0000000000..1a997ac8fc
--- /dev/null
+++ b/src/regex/utils.h
@@ -0,0 +1,22 @@
+/* utility definitions */
+#ifdef _POSIX2_RE_DUP_MAX
+#define DUPMAX _POSIX2_RE_DUP_MAX
+#else
+#define DUPMAX 255
+#endif
+#define INFINITY (DUPMAX + 1)
+#define NC (CHAR_MAX - CHAR_MIN + 1)
+typedef unsigned char uch;
+
+/* switch off assertions (if not already off) if no REDEBUG */
+#ifndef REDEBUG
+#ifndef NDEBUG
+#define NDEBUG /* no assertions please */
+#endif
+#endif
+#include
+
+/* for old systems with bcopy() but no memmove() */
+#ifdef USEBCOPY
+#define memmove(d, s, c) bcopy(s, d, c)
+#endif
diff --git a/src/unix/net.cpp b/src/unix/net.cpp
deleted file mode 100644
index 8fff0bfdd1..0000000000
--- a/src/unix/net.cpp
+++ /dev/null
@@ -1,422 +0,0 @@
-// -*- c++ -*- ///////////////////////////////////////////////////////////////
-// Name: unix/net.cpp
-// Purpose: Network related wxWindows classes and functions
-// Author: Karsten Ballüder
-// Modified by:
-// Created: 03.10.99
-// RCS-ID: $Id$
-// Copyright: (c) Karsten Ballüder
-// Licence: wxWindows licence
-/////////////////////////////////////////////////////////////////////////////
-
-#include "wx/setup.h"
-
-#if wxUSE_DIALUP_MANAGER
-
-#ifndef WX_PRECOMP
-# include "wx/defs.h"
-#endif // !PCH
-
-#include "wx/string.h"
-#include "wx/event.h"
-#include "wx/net.h"
-#include "wx/timer.h"
-#include "wx/filefn.h"
-#include "wx/utils.h"
-#include "wx/log.h"
-#include "wx/file.h"
-
-#include
-#include
-#include
-#include
-#define __STRICT_ANSI__
-#include
-#include
-#include
-#include
-#include
-#include
-
-// ----------------------------------------------------------------------------
-// A class which groups functions dealing with connecting to the network from a
-// workstation using dial-up access to the net. There is at most one instance
-// of this class in the program accessed via GetDialUpManager().
-// ----------------------------------------------------------------------------
-
-/* TODO
- *
- * 1. more configurability for Unix: i.e. how to initiate the connection, how
- * to check for online status, &c.
- * 2. add a "long Dial(long connectionId = -1)" function which asks the user
- * about which connection to dial (this may be done using native dialogs
- * under NT, need generic dialogs for all others) and returns the identifier
- * of the selected connection (it's opaque to the application) - it may be
- * reused later to dial the same connection later (or use strings instead of
- * longs may be?)
- * 3. add an async version of dialing functions which notify the caller about
- * the progress (or may be even start another thread to monitor it)
- * 4. the static creation/accessor functions are not MT-safe - but is this
- * really crucial? I think we may suppose they're always called from the
- * main thread?
- */
-
-class WXDLLEXPORT wxDialUpManagerImpl : public wxDialUpManager
-{
-public:
- wxDialUpManagerImpl()
- {
- m_IsOnline = -1; // unknown
- m_timer = NULL;
- m_CanUseIfconfig = -1; // unknown
- m_BeaconHost = WXDIALUP_MANAGER_DEFAULT_BEACONHOST;
- m_BeaconPort = 80;
- }
-
- /** Could the dialup manager be initialized correctly? If this function
- returns FALSE, no other functions will work neither, so it's a good idea
- to call this function and check its result before calling any other
- wxDialUpManager methods.
- */
- virtual bool IsOk() const
- { return TRUE; }
-
- /** The simplest way to initiate a dial up: this function dials the given
- ISP (exact meaning of the parameter depends on the platform), returns
- TRUE on success or FALSE on failure and logs the appropriate error
- message in the latter case.
- @param nameOfISP optional paramater for dial program
- @param username unused
- @param password unused
- */
- virtual bool Dial(const wxString& nameOfISP,
- const wxString& WXUNUSED(username),
- const wxString& WXUNUSED(password));
-
- /// Hang up the currently active dial up connection.
- virtual bool HangUp();
-
- // returns TRUE if the computer is connected to the network: under Windows,
- // this just means that a RAS connection exists, under Unix we check that
- // the "well-known host" (as specified by SetWellKnownHost) is reachable
- virtual bool IsOnline() const
- {
- if( (! m_timer) // we are not polling, so test now:
- || m_IsOnline == -1
- )
- CheckStatus();
- return m_IsOnline != 0;
- }
-
- // sometimes the built-in logic for determining the online status may fail,
- // so, in general, the user should be allowed to override it. This function
- // allows to forcefully set the online status - whatever our internal
- // algorithm may think about it.
- virtual void SetOnlineStatus(bool isOnline = TRUE)
- { m_IsOnline = isOnline; }
-
- // set misc wxDialUpManager options
- // --------------------------------
-
- // enable automatical checks for the connection status and sending of
- // wxEVT_DIALUP_CONNECTED/wxEVT_DIALUP_DISCONNECTED events. The interval
- // parameter is only for Unix where we do the check manually: under
- // Windows, the notification about the change of connection status is
- // instantenous.
- //
- // Returns FALSE if couldn't set up automatic check for online status.
- virtual bool EnableAutoCheckOnlineStatus(size_t nSeconds);
-
- // disable automatic check for connection status change - notice that the
- // wxEVT_DIALUP_XXX events won't be sent any more neither.
- virtual void DisableAutoCheckOnlineStatus();
-
- // under Unix, the value of well-known host is used to check whether we're
- // connected to the internet. It's unused under Windows, but this function
- // is always safe to call. The default value is www.yahoo.com.
- virtual void SetWellKnownHost(const wxString& hostname,
- int portno = 80);
- /** Sets the commands to start up the network and to hang up
- again. Used by the Unix implementations only.
- */
- virtual void SetConnectCommand(const wxString &command, const wxString &hupcmd)
- { m_ConnectCommand = command; m_HangUpCommand = hupcmd; }
-
-private:
- /// -1: don´t know, 0 = no, 1 = yes
- int m_IsOnline;
-
- /// Can we use ifconfig to list active devices?
- int m_CanUseIfconfig;
- /// The path to ifconfig
- wxString m_IfconfigPath;
-
- /// beacon host:
- wxString m_BeaconHost;
- /// beacon host portnumber for connect:
- int m_BeaconPort;
-
- /// command to connect to network
- wxString m_ConnectCommand;
- /// command to hang up
- wxString m_HangUpCommand;
- /// name of ISP
- wxString m_ISPname;
- /// a timer for regular testing
- class AutoCheckTimer *m_timer;
-
- friend class AutoCheckTimer;
- /// determine status
- void CheckStatus(void) const;
-
- /// real status check
- void CheckStatusInternal(void);
-};
-
-
-class AutoCheckTimer : public wxTimer
-{
-public:
- AutoCheckTimer(wxDialUpManagerImpl *dupman)
- {
- m_dupman = dupman;
- m_started = FALSE;
- }
-
- virtual bool Start( int millisecs = -1 )
- { m_started = TRUE; return wxTimer::Start(millisecs, FALSE); }
-
- virtual void Notify()
- { wxLogTrace("Checking dial up network status."); m_dupman->CheckStatus(); }
-
- virtual void Stop()
- { if ( m_started ) wxTimer::Stop(); }
-public:
- bool m_started;
- wxDialUpManagerImpl *m_dupman;
-};
-
-bool
-wxDialUpManagerImpl::Dial(const wxString &isp,
- const wxString & WXUNUSED(username),
- const wxString & WXUNUSED(password))
-{
- if(m_IsOnline == 1)
- return FALSE;
- m_IsOnline = -1;
- m_ISPname = isp;
- wxString cmd;
- if(m_ConnectCommand.Find("%s"))
- cmd.Printf(m_ConnectCommand,m_ISPname.c_str());
- else
- cmd = m_ConnectCommand;
- return wxExecute(cmd, /* sync */ TRUE) == 0;
-}
-
-bool
-wxDialUpManagerImpl::HangUp(void)
-{
- if(m_IsOnline == 0)
- return FALSE;
- m_IsOnline = -1;
- wxString cmd;
- if(m_HangUpCommand.Find("%s"))
- cmd.Printf(m_HangUpCommand,m_ISPname.c_str());
- else
- cmd = m_HangUpCommand;
- return wxExecute(cmd, /* sync */ TRUE) == 0;
-}
-
-
-bool
-wxDialUpManagerImpl::EnableAutoCheckOnlineStatus(size_t nSeconds)
-{
- wxASSERT(m_timer == NULL);
- m_timer = new AutoCheckTimer(this);
- bool rc = m_timer->Start(nSeconds*1000);
- if(! rc)
- {
- delete m_timer;
- m_timer = NULL;
- }
- return rc;
-}
-
-void
-wxDialUpManagerImpl::DisableAutoCheckOnlineStatus()
-{
- wxASSERT(m_timer != NULL);
- m_timer->Stop();
- delete m_timer;
- m_timer = NULL;
-}
-
-
-void
-wxDialUpManagerImpl::SetWellKnownHost(const wxString& hostname, int portno)
-{
- /// does hostname contain a port number?
- wxString port = hostname.After(':');
- if(port.Length())
- {
- m_BeaconHost = hostname.Before(':');
- m_BeaconPort = atoi(port);
- }
- else
- {
- m_BeaconHost = hostname;
- m_BeaconPort = portno;
- }
-}
-
-
-void
-wxDialUpManagerImpl::CheckStatus(void) const
-{
- // This function calls the CheckStatusInternal() helper function
- // which is OS - specific and then sends the events.
-
- int oldIsOnline = m_IsOnline;
- ( /* non-const */ (wxDialUpManagerImpl *)this)->CheckStatusInternal();
-
- // now send the events as appropriate:
- if(m_IsOnline != oldIsOnline)
- {
- if(m_IsOnline)
- ; // send ev
- else
- ; // send ev
- }
-}
-
-/*
- We have three methods that we can use:
-
- 1. test via /sbin/ifconfig and grep for "sl", "ppp", "pl"
- --> should be fast enough for regular polling
- 2. test if we can reach the well known beacon host
- --> too slow for polling
- 3. check /proc/net/dev on linux??
- This method should be preferred, if possible. Need to do more
- testing.
-
-*/
-
-void
-wxDialUpManagerImpl::CheckStatusInternal(void)
-{
- m_IsOnline = -1;
-
- // First time check for ifconfig location. We only use the variant
- // which does not take arguments, a la GNU.
- if(m_CanUseIfconfig == -1) // unknown
- {
- if(wxFileExists("/sbin/ifconfig"))
- m_IfconfigPath = "/sbin/ifconfig";
- else if(wxFileExists("/usr/sbin/ifconfig"))
- m_IfconfigPath = "/usr/sbin/ifconfig";
- }
-
- wxLogNull ln; // suppress all error messages
- // Let´s try the ifconfig method first, should be fastest:
- if(m_CanUseIfconfig != 0) // unknown or yes
- {
- wxASSERT(m_IfconfigPath.length());
-
- wxString tmpfile = wxGetTempFileName("_wxdialuptest");
- wxString cmd = "/bin/sh -c \'";
- cmd << m_IfconfigPath << " >" << tmpfile << '\'';
- /* I tried to add an option to wxExecute() to not close stdout,
- so we could let ifconfig write directly to the tmpfile, but
- this does not work. That should be faster, as it doesn´t call
- the shell first. I have no idea why. :-( (KB) */
-#if 0
- // temporarily redirect stdout/stderr:
- int
- new_stdout = dup(STDOUT_FILENO),
- new_stderr = dup(STDERR_FILENO);
- close(STDOUT_FILENO);
- close(STDERR_FILENO);
-
- int
- // new stdout:
- output_fd = open(tmpfile, O_CREAT|O_TRUNC, S_IRUSR|S_IWUSR),
- // new stderr:
- null_fd = open("/dev/null", O_CREAT, S_IRUSR|S_IWUSR);
- // verify well behaved unix behaviour:
- wxASSERT(output_fd == STDOUT_FILENO);
- wxASSERT(null_fd == STDERR_FILENO);
- int rc = wxExecute(m_IfconfigPath,TRUE /* sync */,NULL ,wxEXECUTE_DONT_CLOSE_FDS);
- close(null_fd); close(output_fd);
- // restore old stdout, stderr:
- int test;
- test = dup(new_stdout); close(new_stdout); wxASSERT(test == STDOUT_FILENO);
- test = dup(new_stderr); close(new_stderr); wxASSERT(test == STDERR_FILENO);
- if(rc == 0)
-#endif
- if(wxExecute(cmd,TRUE /* sync */) == 0)
- {
- m_CanUseIfconfig = 1;
- wxFile file;
- if( file.Open(tmpfile) )
- {
- char *output = new char [file.Length()+1];
- output[file.Length()] = '\0';
- if(file.Read(output,file.Length()) == file.Length())
- {
- if(strstr(output,"ppp") // ppp
- || strstr(output,"sl") // slip
- || strstr(output,"pl") // plip
- )
- m_IsOnline = 1;
- else
- m_IsOnline = 0;
- }
- file.Close();
- delete [] output;
- }
- // else m_IsOnline remains -1 as we don't know for sure
- }
- else // could not run ifconfig correctly
- m_CanUseIfconfig = 0; // don´t try again
- (void) wxRemoveFile(tmpfile);
- if(m_IsOnline != -1) // we are done
- return;
- }
-
- // second method: try to connect to well known host:
- // This can be used under Win 9x, too!
- struct hostent *hp;
- struct sockaddr_in serv_addr;
- int sockfd;
-
- m_IsOnline = 0; // assume false
- if((hp = gethostbyname(m_BeaconHost)) == NULL)
- return; // no DNS no net
-
- serv_addr.sin_family = hp->h_addrtype;
- memcpy(&serv_addr.sin_addr,hp->h_addr, hp->h_length);
- serv_addr.sin_port = htons(m_BeaconPort);
- if( ( sockfd = socket(hp->h_addrtype, SOCK_STREAM, 0)) < 0)
- {
- // sys_error("cannot create socket for gw");
- return;
- }
- if( connect(sockfd, (struct sockaddr *) &serv_addr, sizeof(serv_addr)) < 0)
- {
- //sys_error("cannot connect to server");
- return;
- }
- //connected!
- close(sockfd);
-}
-
-
-/* static */
-wxDialUpManager *
-wxDialUpManager::wxDialUpManager::Create(void)
-{
- return new wxDialUpManagerImpl;
-}
-
-#endif // wxUSE_DIALUP_MANAGER
diff --git a/utils/wxprop/src/prop.cpp b/utils/wxprop/src/prop.cpp
deleted file mode 100644
index 59802962de..0000000000
--- a/utils/wxprop/src/prop.cpp
+++ /dev/null
@@ -1,1119 +0,0 @@
-/////////////////////////////////////////////////////////////////////////////
-// Name: prop.cpp
-// Purpose: Propert sheet classes implementation
-// Author: Julian Smart
-// Modified by:
-// Created: 04/01/98
-// RCS-ID: $Id$
-// Copyright: (c) Julian Smart
-// Licence: wxWindows license
-/////////////////////////////////////////////////////////////////////////////
-
-#ifdef __GNUG__
-#pragma implementation "prop.h"
-#endif
-
-// For compilers that support precompilation, includes "wx/wx.h".
-#include "wx/wxprec.h"
-
-#ifdef __BORLANDC__
-#pragma hdrstop
-#endif
-
-#ifndef WX_PRECOMP
-#include "wx/wx.h"
-#endif
-
-#include
-#include
-#include
-#include
-
-#if wxUSE_IOSTREAMH
-#if defined(__WXMSW__) && !defined(__GNUWIN32__)
-#include
-#else
-#include
-#endif
-#else
-#include
-#endif
-
-#include "wx/window.h"
-#include "wx/utils.h"
-#include "wx/list.h"
-#include "prop.h"
-
-IMPLEMENT_DYNAMIC_CLASS(wxPropertyValue, wxObject)
-
-wxPropertyValue::wxPropertyValue(void)
-{
- m_type = wxPropertyValueNull;
- m_next = NULL;
- m_last = NULL;
- m_value.first = NULL;
- m_clientData = NULL;
- m_modifiedFlag = FALSE;
-}
-
-wxPropertyValue::wxPropertyValue(const wxPropertyValue& copyFrom)
-{
- m_modifiedFlag = FALSE;
- Copy((wxPropertyValue& )copyFrom);
-}
-
-wxPropertyValue::wxPropertyValue(const char *val)
-{
- m_modifiedFlag = FALSE;
- m_type = wxPropertyValueString;
-
- m_value.string = copystring(val);
- m_clientData = NULL;
- m_next = NULL;
- m_last = NULL;
-}
-
-wxPropertyValue::wxPropertyValue(const wxString& val)
-{
- m_modifiedFlag = FALSE;
- m_type = wxPropertyValueString;
-
- m_value.string = copystring((const char *)val);
- m_clientData = NULL;
- m_next = NULL;
- m_last = NULL;
-}
-
-wxPropertyValue::wxPropertyValue(long the_integer)
-{
- m_modifiedFlag = FALSE;
- m_type = wxPropertyValueInteger;
- m_value.integer = the_integer;
- m_clientData = NULL;
- m_next = NULL;
-}
-
-wxPropertyValue::wxPropertyValue(bool val)
-{
- m_modifiedFlag = FALSE;
- m_type = wxPropertyValuebool;
- m_value.integer = val;
- m_clientData = NULL;
- m_next = NULL;
-}
-
-wxPropertyValue::wxPropertyValue(float the_real)
-{
- m_modifiedFlag = FALSE;
- m_type = wxPropertyValueReal;
- m_value.real = the_real;
- m_clientData = NULL;
- m_next = NULL;
-}
-
-wxPropertyValue::wxPropertyValue(double the_real)
-{
- m_modifiedFlag = FALSE;
- m_type = wxPropertyValueReal;
- m_value.real = (float)the_real;
- m_clientData = NULL;
- m_next = NULL;
-}
-
-// Pointer versions: we have a pointer to the real C++ value.
-wxPropertyValue::wxPropertyValue(char **val)
-{
- m_modifiedFlag = FALSE;
- m_type = wxPropertyValueStringPtr;
-
- m_value.stringPtr = val;
- m_clientData = NULL;
- m_next = NULL;
- m_last = NULL;
-}
-
-wxPropertyValue::wxPropertyValue(long *val)
-{
- m_modifiedFlag = FALSE;
- m_type = wxPropertyValueIntegerPtr;
- m_value.integerPtr = val;
- m_clientData = NULL;
- m_next = NULL;
-}
-
-wxPropertyValue::wxPropertyValue(bool *val)
-{
- m_modifiedFlag = FALSE;
- m_type = wxPropertyValueboolPtr;
- m_value.boolPtr = val;
- m_clientData = NULL;
- m_next = NULL;
-}
-
-wxPropertyValue::wxPropertyValue(float *val)
-{
- m_modifiedFlag = FALSE;
- m_type = wxPropertyValueRealPtr;
- m_value.realPtr = val;
- m_clientData = NULL;
- m_next = NULL;
-}
-
-wxPropertyValue::wxPropertyValue(wxList *the_list)
-{
- m_modifiedFlag = FALSE;
- m_type = wxPropertyValueList;
- m_clientData = NULL;
- m_last = NULL;
- m_value.first = NULL;
-
- wxNode *node = the_list->First();
- while (node)
- {
- wxPropertyValue *expr = (wxPropertyValue *)node->Data();
- Append(expr);
- node = node->Next();
- }
-
- delete the_list;
-}
-
-wxPropertyValue::wxPropertyValue(wxStringList *the_list)
-{
- m_modifiedFlag = FALSE;
- m_type = wxPropertyValueList;
- m_clientData = NULL;
- m_last = NULL;
- m_value.first = NULL;
-
- wxNode *node = the_list->First();
- while (node)
- {
- char *s = (char *)node->Data();
- Append(new wxPropertyValue(s));
- node = node->Next();
- }
- delete the_list;
-}
-
-wxPropertyValue::~wxPropertyValue(void)
-{
- switch (m_type)
- {
- case wxPropertyValueInteger:
- case wxPropertyValuebool:
- case wxPropertyValueReal:
- {
- break;
- }
- case wxPropertyValueString:
- {
- delete[] m_value.string;
- break;
- }
- case wxPropertyValueList:
- {
- wxPropertyValue *expr = m_value.first;
- while (expr)
- {
- wxPropertyValue *expr1 = expr->m_next;
-
- delete expr;
- expr = expr1;
- }
- break;
- }
- default:
- case wxPropertyValueNull: break;
- }
-}
-
-void wxPropertyValue::Append(wxPropertyValue *expr)
-{
- m_modifiedFlag = TRUE;
- if (!m_value.first)
- m_value.first = expr;
-
- if (m_last)
- m_last->m_next = expr;
- m_last = expr;
-}
-
-void wxPropertyValue::Insert(wxPropertyValue *expr)
-{
- m_modifiedFlag = TRUE;
- expr->m_next = m_value.first;
- m_value.first = expr;
-
- if (!m_last)
- m_last = expr;
-}
-
-// Delete from list
-void wxPropertyValue::Delete(wxPropertyValue *node)
-{
- wxPropertyValue *expr = GetFirst();
-
- wxPropertyValue *previous = NULL;
- while (expr && (expr != node))
- {
- previous = expr;
- expr = expr->GetNext();
- }
-
- if (expr)
- {
- if (previous)
- previous->m_next = expr->m_next;
-
- // If node was the first in the list,
- // make the list point to the NEXT one.
- if (GetFirst() == expr)
- {
- m_value.first = expr->m_next;
- }
-
- // If node was the last in the list,
- // make the list 'last' pointer point to the PREVIOUS one.
- if (GetLast() == expr)
- {
- if (previous)
- m_last = previous;
- else
- m_last = NULL;
- }
- m_modifiedFlag = TRUE;
- delete expr;
- }
-
-}
-
-void wxPropertyValue::ClearList(void)
-{
- wxPropertyValue *val = GetFirst();
- if (val)
- m_modifiedFlag = TRUE;
-
- while (val)
- {
- wxPropertyValue *next = val->GetNext();
- delete val;
- val = next;
- }
- m_value.first = NULL;
- m_last = NULL;
-}
-
-wxPropertyValue *wxPropertyValue::NewCopy(void) const
-{
- switch (m_type)
- {
- case wxPropertyValueInteger:
- return new wxPropertyValue(m_value.integer);
- case wxPropertyValuebool:
- return new wxPropertyValue((bool) (m_value.integer != 0));
- case wxPropertyValueReal:
- return new wxPropertyValue(m_value.real);
- case wxPropertyValueString:
- return new wxPropertyValue(m_value.string);
- case wxPropertyValueList:
- {
- wxPropertyValue *expr = m_value.first;
- wxPropertyValue *new_list = new wxPropertyValue;
- new_list->SetType(wxPropertyValueList);
- while (expr)
- {
- wxPropertyValue *expr2 = expr->NewCopy();
- new_list->Append(expr2);
- expr = expr->m_next;
- }
- return new_list;
- }
- case wxPropertyValueIntegerPtr:
- return new wxPropertyValue(m_value.integerPtr);
- case wxPropertyValueRealPtr:
- return new wxPropertyValue(m_value.realPtr);
- case wxPropertyValueboolPtr:
- return new wxPropertyValue(m_value.boolPtr);
- case wxPropertyValueStringPtr:
- return new wxPropertyValue(m_value.stringPtr);
-
- case wxPropertyValueNull:
-#ifdef __X__
- cerr << "Should never get here!\n";
-#endif
- break;
- }
- return NULL;
-}
-
-void wxPropertyValue::Copy(wxPropertyValue& copyFrom)
-{
- m_type = copyFrom.Type();
-
- switch (m_type)
- {
- case wxPropertyValueInteger:
- (*this) = copyFrom.IntegerValue();
- return ;
-
- case wxPropertyValueReal:
- (*this) = copyFrom.RealValue();
- return ;
-
- case wxPropertyValueString:
- (*this) = wxString(copyFrom.StringValue());
- return ;
-
- case wxPropertyValuebool:
- (*this) = copyFrom.BoolValue();
- return ;
-
- // Pointers
- case wxPropertyValueboolPtr:
- (*this) = copyFrom.BoolValuePtr();
- return ;
- case wxPropertyValueRealPtr:
- (*this) = copyFrom.RealValuePtr();
- return ;
- case wxPropertyValueIntegerPtr:
- (*this) = copyFrom.IntegerValuePtr();
- return ;
- case wxPropertyValueStringPtr:
- {
- char** s = copyFrom.StringValuePtr();
- (*this) = s != 0;
- return ;
- }
-
- case wxPropertyValueList:
- {
- m_value.first = NULL;
- m_next = NULL;
- m_last = NULL;
- wxPropertyValue *expr = copyFrom.m_value.first;
- while (expr)
- {
- wxPropertyValue *expr2 = expr->NewCopy();
- Append(expr2);
- expr = expr->m_next;
- }
- return;
- }
- case wxPropertyValueNull:
-#ifdef __X__
- cerr << "Should never get here!\n";
-#endif
- break;
- }
-}
-
-// Return nth argument of a clause (starting from 1)
-wxPropertyValue *wxPropertyValue::Arg(wxPropertyValueType type, int arg) const
-{
- wxPropertyValue *expr = m_value.first;
- for (int i = 1; i < arg; i++)
- if (expr)
- expr = expr->m_next;
-
- if (expr && (expr->m_type == type))
- return expr;
- else
- return NULL;
-}
-
-// Return nth argument of a list expression (starting from zero)
-wxPropertyValue *wxPropertyValue::Nth(int arg) const
-{
- if (m_type != wxPropertyValueList)
- return NULL;
-
- wxPropertyValue *expr = m_value.first;
- for (int i = 0; i < arg; i++)
- if (expr)
- expr = expr->m_next;
- else return NULL;
-
- if (expr)
- return expr;
- else
- return NULL;
-}
-
- // Returns the number of elements in a list expression
-int wxPropertyValue::Number(void) const
-{
- if (m_type != wxPropertyValueList)
- return 0;
-
- int i = 0;
- wxPropertyValue *expr = m_value.first;
- while (expr)
- {
- expr = expr->m_next;
- i ++;
- }
- return i;
-}
-
-void wxPropertyValue::WritePropertyClause(ostream& stream) // Write this expression as a top-level clause
-{
- if (m_type != wxPropertyValueList)
- return;
-
- wxPropertyValue *node = m_value.first;
- if (node)
- {
- node->WritePropertyType(stream);
- stream << "(";
- node = node->m_next;
- bool first = TRUE;
- while (node)
- {
- if (!first)
- stream << " ";
- node->WritePropertyType(stream);
- node = node->m_next;
- if (node) stream << ",\n";
- first = FALSE;
- }
- stream << ").\n\n";
- }
-}
-
-void wxPropertyValue::WritePropertyType(ostream& stream) // Write as any other subexpression
-{
- switch (m_type)
- {
- case wxPropertyValueInteger:
- {
- stream << m_value.integer;
- break;
- }
- case wxPropertyValueIntegerPtr:
- {
- stream << *m_value.integerPtr;
- break;
- }
- case wxPropertyValuebool:
- {
- if (m_value.integer)
- stream << "True";
- else
- stream << "False";
- break;
- }
- case wxPropertyValueboolPtr:
- {
- if (*m_value.integerPtr)
- stream << "True";
- else
- stream << "False";
- break;
- }
- case wxPropertyValueReal:
- {
- float f = m_value.real;
- sprintf(wxBuffer, "%.6g", (double)f);
- stream << wxBuffer;
- break;
- }
- case wxPropertyValueRealPtr:
- {
- float f = *m_value.realPtr;
-/* Now the parser can cope with this.
- // Prevent printing in 'e' notation. Any better way?
- if (fabs(f) < 0.00001)
- f = 0.0;
-*/
- sprintf(wxBuffer, "%.6g", f);
- stream << wxBuffer;
- break;
- }
- case wxPropertyValueString:
- {
-// stream << "\"";
- int i;
- int len = strlen(m_value.string);
- for (i = 0; i < len; i++)
- {
- char ch = m_value.string[i];
-// if (ch == '"' || ch == '\\')
-// stream << "\\";
- stream << ch;
- }
-
-// stream << "\"";
- break;
- }
- case wxPropertyValueStringPtr:
- {
- int i;
- int len = strlen(*(m_value.stringPtr));
- for (i = 0; i < len; i++)
- {
- char ch = *(m_value.stringPtr)[i];
-
- }
- break;
- }
- case wxPropertyValueList:
- {
- if (!m_value.first)
- stream << "[]";
- else
- {
- wxPropertyValue *expr = m_value.first;
-
- stream << "[";
- while (expr)
- {
- expr->WritePropertyType(stream);
- expr = expr->m_next;
- if (expr) stream << ", ";
- }
- stream << "]";
- }
- break;
- }
- case wxPropertyValueNull: break;
- }
-}
-
-wxString wxPropertyValue::GetStringRepresentation(void)
-{
- char buf[500];
- buf[0] = 0;
-
- ostrstream str((char *)buf, (int)500, ios::out);
- WritePropertyType(str);
- str << '\0';
- str.flush();
-
- wxString theString(buf);
- return theString;
-}
-
-void wxPropertyValue::operator=(const wxPropertyValue& val)
-{
- m_modifiedFlag = TRUE;
- Copy((wxPropertyValue&)val);
-}
-
-// void wxPropertyValue::operator=(const char *val)
-void wxPropertyValue::operator=(const wxString& val1)
-{
- const char *val = (const char *)val1;
-
- m_modifiedFlag = TRUE;
- if (m_type == wxPropertyValueNull)
- m_type = wxPropertyValueString;
-
- if (m_type == wxPropertyValueString)
- {
- if (val)
- m_value.string = copystring(val);
- else
- m_value.string = NULL;
- }
- else if (m_type == wxPropertyValueStringPtr)
- {
- if (*m_value.stringPtr)
- delete[] *m_value.stringPtr;
- if (val)
- *m_value.stringPtr = copystring(val);
- else
- *m_value.stringPtr = NULL;
- }
-
- m_clientData = NULL;
- m_next = NULL;
- m_last = NULL;
-
-}
-
-void wxPropertyValue::operator=(const long val)
-{
- m_modifiedFlag = TRUE;
- if (m_type == wxPropertyValueNull)
- m_type = wxPropertyValueInteger;
-
- if (m_type == wxPropertyValueInteger)
- m_value.integer = val;
- else if (m_type == wxPropertyValueIntegerPtr)
- *m_value.integerPtr = val;
- else if (m_type == wxPropertyValueReal)
- m_value.real = (float)val;
- else if (m_type == wxPropertyValueRealPtr)
- *m_value.realPtr = (float)val;
-
- m_clientData = NULL;
- m_next = NULL;
-}
-
-void wxPropertyValue::operator=(const bool val)
-{
- m_modifiedFlag = TRUE;
- if (m_type == wxPropertyValueNull)
- m_type = wxPropertyValuebool;
-
- if (m_type == wxPropertyValuebool)
- m_value.integer = (long)val;
- else if (m_type == wxPropertyValueboolPtr)
- *m_value.boolPtr = val;
-
- m_clientData = NULL;
- m_next = NULL;
-}
-
-void wxPropertyValue::operator=(const float val)
-{
- m_modifiedFlag = TRUE;
- if (m_type == wxPropertyValueNull)
- m_type = wxPropertyValueReal;
-
- if (m_type == wxPropertyValueInteger)
- m_value.integer = (long)val;
- else if (m_type == wxPropertyValueIntegerPtr)
- *m_value.integerPtr = (long)val;
- else if (m_type == wxPropertyValueReal)
- m_value.real = val;
- else if (m_type == wxPropertyValueRealPtr)
- *m_value.realPtr = val;
-
- m_clientData = NULL;
- m_next = NULL;
-}
-
-void wxPropertyValue::operator=(const char **val)
-{
- m_modifiedFlag = TRUE;
- m_type = wxPropertyValueStringPtr;
-
- if (val)
- m_value.stringPtr = (char **)val;
- else
- m_value.stringPtr = NULL;
- m_clientData = NULL;
- m_next = NULL;
- m_last = NULL;
-
-}
-
-void wxPropertyValue::operator=(const long *val)
-{
- m_modifiedFlag = TRUE;
- m_type = wxPropertyValueIntegerPtr;
- m_value.integerPtr = (long *)val;
- m_clientData = NULL;
- m_next = NULL;
-}
-
-void wxPropertyValue::operator=(const bool *val)
-{
- m_modifiedFlag = TRUE;
- m_type = wxPropertyValueboolPtr;
- m_value.boolPtr = (bool *)val;
- m_clientData = NULL;
- m_next = NULL;
-}
-
-void wxPropertyValue::operator=(const float *val)
-{
- m_modifiedFlag = TRUE;
- m_type = wxPropertyValueRealPtr;
- m_value.realPtr = (float *)val;
- m_clientData = NULL;
- m_next = NULL;
-}
-
-long wxPropertyValue::IntegerValue(void) const
- {
- if (m_type == wxPropertyValueInteger)
- return m_value.integer;
- else if (m_type == wxPropertyValueReal)
- return (long)m_value.real;
- else if (m_type == wxPropertyValueIntegerPtr)
- return *m_value.integerPtr;
- else if (m_type == wxPropertyValueRealPtr)
- return (long)(*m_value.realPtr);
- else return 0;
- }
-
-long *wxPropertyValue::IntegerValuePtr(void) const
-{
- return m_value.integerPtr;
-}
-
-float wxPropertyValue::RealValue(void) const {
- if (m_type == wxPropertyValueReal)
- return m_value.real;
- else if (m_type == wxPropertyValueRealPtr)
- return *m_value.realPtr;
- else if (m_type == wxPropertyValueInteger)
- return (float)m_value.integer;
- else if (m_type == wxPropertyValueIntegerPtr)
- return (float)*(m_value.integerPtr);
- else return 0.0;
- }
-
-float *wxPropertyValue::RealValuePtr(void) const
-{
- return m_value.realPtr;
-}
-
-bool wxPropertyValue::BoolValue(void) const {
- if (m_type == wxPropertyValueReal)
- return (m_value.real != 0.0);
- if (m_type == wxPropertyValueRealPtr)
- return (*(m_value.realPtr) != 0.0);
- else if (m_type == wxPropertyValueInteger)
- return (m_value.integer != 0);
- else if (m_type == wxPropertyValueIntegerPtr)
- return (*(m_value.integerPtr) != 0);
- else if (m_type == wxPropertyValuebool)
- return (m_value.integer != 0);
- else if (m_type == wxPropertyValueboolPtr)
- return (*(m_value.boolPtr) != 0);
- else return FALSE;
- }
-
-bool *wxPropertyValue::BoolValuePtr(void) const
-{
- return m_value.boolPtr;
-}
-
-char *wxPropertyValue::StringValue(void) const {
- if (m_type == wxPropertyValueString)
- return m_value.string;
- else if (m_type == wxPropertyValueStringPtr)
- return *(m_value.stringPtr);
- else return NULL;
- }
-
-char **wxPropertyValue::StringValuePtr(void) const
-{
- return m_value.stringPtr;
-}
-
-/*
- * A property (name plus value)
- */
-
-IMPLEMENT_DYNAMIC_CLASS(wxProperty, wxObject)
-
-wxProperty::wxProperty(void)
-{
- m_propertyRole = (char *)NULL;
- m_propertyValidator = NULL;
- m_propertyWindow = NULL;
- m_enabled = TRUE;
-}
-
-wxProperty::wxProperty(wxProperty& copyFrom)
-{
- m_value = copyFrom.GetValue();
- m_name = copyFrom.GetName();
- m_propertyRole = copyFrom.GetRole();
- m_propertyValidator = copyFrom.GetValidator();
- m_enabled = copyFrom.IsEnabled();
- m_propertyWindow = NULL;
-}
-
-wxProperty::wxProperty(wxString nm, wxString role, wxPropertyValidator *ed):m_name(nm), m_propertyRole(role)
-{
- m_propertyValidator = ed;
- m_propertyWindow = NULL;
- m_enabled = TRUE;
-}
-
-wxProperty::wxProperty(wxString nm, const wxPropertyValue& val, wxString role, wxPropertyValidator *ed):
- m_name(nm), m_value(val), m_propertyRole(role)
-{
- m_propertyValidator = ed;
- m_propertyWindow = NULL;
- m_enabled = TRUE;
-}
-
-wxProperty::~wxProperty(void)
-{
- if (m_propertyValidator)
- delete m_propertyValidator;
-}
-
-wxPropertyValue& wxProperty::GetValue(void) const
-{
- return (wxPropertyValue&) m_value;
-}
-
-wxPropertyValidator *wxProperty::GetValidator(void) const
-{
- return m_propertyValidator;
-}
-
-wxString& wxProperty::GetName(void) const
-{
- return (wxString&) m_name;
-}
-
-wxString& wxProperty::GetRole(void) const
-{
- return (wxString&) m_propertyRole;
-}
-
-void wxProperty::SetValue(const wxPropertyValue& val)
-{
- m_value = val;
-}
-
-void wxProperty::SetValidator(wxPropertyValidator *ed)
-{
- m_propertyValidator = ed;
-}
-
-void wxProperty::SetRole(wxString& role)
-{
- m_propertyRole = role;
-}
-
-void wxProperty::SetName(wxString& nm)
-{
- m_name = nm;
-}
-
-void wxProperty::operator=(const wxPropertyValue& val)
-{
- m_value = val;
-}
-
-/*
- * Base property view class
- */
-
-IMPLEMENT_DYNAMIC_CLASS(wxPropertyView, wxEvtHandler)
-
-wxPropertyView::wxPropertyView(long flags)
-{
- m_buttonFlags = flags;
- m_propertySheet = NULL;
- m_currentValidator = NULL;
- m_currentProperty = NULL;
-}
-
-wxPropertyView::~wxPropertyView(void)
-{
-}
-
-void wxPropertyView::AddRegistry(wxPropertyValidatorRegistry *registry)
-{
- m_validatorRegistryList.Append(registry);
-}
-
-wxPropertyValidator *wxPropertyView::FindPropertyValidator(wxProperty *property)
-{
- if (property->GetValidator())
- return property->GetValidator();
-
- wxNode *node = m_validatorRegistryList.First();
- while (node)
- {
- wxPropertyValidatorRegistry *registry = (wxPropertyValidatorRegistry *)node->Data();
- wxPropertyValidator *validator = registry->GetValidator(property->GetRole());
- if (validator)
- return validator;
- node = node->Next();
- }
- return NULL;
-/*
- if (!wxDefaultPropertyValidator)
- wxDefaultPropertyValidator = new wxPropertyListValidator;
- return wxDefaultPropertyValidator;
-*/
-}
-
-/*
- * Property sheet
- */
-
-IMPLEMENT_DYNAMIC_CLASS(wxPropertySheet, wxObject)
-
-wxPropertySheet::wxPropertySheet(void):m_properties(wxKEY_STRING)
-{
-}
-
-wxPropertySheet::~wxPropertySheet(void)
-{
- Clear();
-}
-
-bool wxPropertySheet::Save( ostream& WXUNUSED(str) )
-{
- return FALSE;
-}
-
-bool wxPropertySheet::Load( ostream& WXUNUSED(str) )
-{
- return FALSE;
-}
-
-void wxPropertySheet::UpdateAllViews( wxPropertyView *WXUNUSED(thisView) )
-{
-}
-
-// Add a property
-void wxPropertySheet::AddProperty(wxProperty *property)
-{
- m_properties.Append((const char*) property->GetName(), property);
-}
-
-// Get property by name
-wxProperty *wxPropertySheet::GetProperty(wxString name)
-{
- wxNode *node = m_properties.Find((const char*) name);
- if (!node)
- return NULL;
- else
- return (wxProperty *)node->Data();
-}
-
-// Clear all properties
-void wxPropertySheet::Clear(void)
-{
- wxNode *node = m_properties.First();
- while (node)
- {
- wxProperty *prop = (wxProperty *)node->Data();
- wxNode *next = node->Next();
- delete prop;
- delete node;
- node = next;
- }
-}
-
-// Sets/clears the modified flag for each property value
-void wxPropertySheet::SetAllModified(bool flag)
-{
- wxNode *node = m_properties.First();
- while (node)
- {
- wxProperty *prop = (wxProperty *)node->Data();
- prop->GetValue().SetModified(flag);
- node = node->Next();
- }
-}
-
-/*
- * Property validator registry
- *
- */
-
-IMPLEMENT_DYNAMIC_CLASS(wxPropertyValidatorRegistry, wxHashTable)
-
-wxPropertyValidatorRegistry::wxPropertyValidatorRegistry(void):wxHashTable(wxKEY_STRING)
-{
-}
-
-wxPropertyValidatorRegistry::~wxPropertyValidatorRegistry(void)
-{
- ClearRegistry();
-}
-
-void wxPropertyValidatorRegistry::RegisterValidator(const wxString& typeName, wxPropertyValidator *validator)
-{
- Put((const char*) typeName, validator);
-}
-
-wxPropertyValidator *wxPropertyValidatorRegistry::GetValidator(const wxString& typeName)
-{
- return (wxPropertyValidator *)Get((const char*) typeName);
-}
-
-void wxPropertyValidatorRegistry::ClearRegistry(void)
-{
- BeginFind();
- wxNode *node;
- while (node = Next())
- {
- delete (wxPropertyValidator *)node->Data();
- }
-}
-
- /*
- * Property validator
- */
-
-
-IMPLEMENT_ABSTRACT_CLASS(wxPropertyValidator, wxEvtHandler)
-
-wxPropertyValidator::wxPropertyValidator(long flags)
-{
- m_validatorFlags = flags;
- m_validatorProperty = NULL;
-}
-
-wxPropertyValidator::~wxPropertyValidator(void)
-{}
-
-bool wxPropertyValidator::StringToFloat (char *s, float *number) {
- double num;
- bool ok = StringToDouble (s, &num);
- *number = (float) num;
- return ok;
-}
-
-bool wxPropertyValidator::StringToDouble (char *s, double *number) {
- bool ok = TRUE;
- char *value_ptr;
- *number = strtod (s, &value_ptr);
- if (value_ptr) {
- int len = strlen (value_ptr);
- for (int i = 0; i < len; i++) {
- ok = (isspace (value_ptr[i]) != 0);
- if (!ok) return FALSE;
- }
- }
- return ok;
-}
-
-bool wxPropertyValidator::StringToInt (char *s, int *number) {
- long num;
- bool ok = StringToLong (s, &num);
- *number = (int) num;
- return ok;
-}
-
-bool wxPropertyValidator::StringToLong (char *s, long *number) {
- bool ok = TRUE;
- char *value_ptr;
- *number = strtol (s, &value_ptr, 10);
- if (value_ptr) {
- int len = strlen (value_ptr);
- for (int i = 0; i < len; i++) {
- ok = (isspace (value_ptr[i]) != 0);
- if (!ok) return FALSE;
- }
- }
- return ok;
-}
-
-char *wxPropertyValidator::FloatToString (float number) {
- static char buf[20];
- sprintf (buf, "%.6g", number);
- return buf;
-}
-
-char *wxPropertyValidator::DoubleToString (double number) {
- static char buf[20];
- sprintf (buf, "%.6g", number);
- return buf;
-}
-
-char *wxPropertyValidator::IntToString (int number) {
- return ::IntToString (number);
-}
-
-char *wxPropertyValidator::LongToString (long number) {
- return ::LongToString (number);
- }
-
-