diff --git a/demos/dbbrowse/makefile.gtk b/demos/dbbrowse/makefile.gtk deleted file mode 100644 index cd8a960812..0000000000 --- a/demos/dbbrowse/makefile.gtk +++ /dev/null @@ -1,15 +0,0 @@ - -# Top dir of wxWindows -top_builddir = /gtm/bart/wxGTK - -PROGRAM=dbbrowser_gtk - - -OBJECTS= dbbrowse.o doc.o pgmctrl.o tabpgwin.o\ - browsedb.o dbtree.o dbgrid.o dlguser.o - - - - -include $(top_builddir)/src/makeprog.env - diff --git a/src/html/htmlfilter.cpp b/src/html/htmlfilter.cpp deleted file mode 100644 index 57508b1069..0000000000 --- a/src/html/htmlfilter.cpp +++ /dev/null @@ -1,172 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Name: filter.cpp -// Purpose: wxHtmlFilter - input filter for translating into HTML format -// Author: Vaclav Slavik -// Copyright: (c) 1999 Vaclav Slavik -// Licence: wxWindows Licence -///////////////////////////////////////////////////////////////////////////// - - -#ifdef __GNUG__ -#pragma implementation "htmlfilter.h" -#endif - -#include "wx/wxprec.h" - -#if wxUSE_HTML - -#ifdef __BORDLANDC__ -#pragma hdrstop -#endif - -#ifndef WXPRECOMP -#endif - -#include "wx/html/htmlfilter.h" -#include "wx/html/htmlwin.h" - - -/* - -There is code for several default filters: - -*/ - -IMPLEMENT_ABSTRACT_CLASS(wxHtmlFilter, wxObject) - -//-------------------------------------------------------------------------------- -// wxHtmlFilterPlainText -// filter for text/plain or uknown -//-------------------------------------------------------------------------------- - -IMPLEMENT_DYNAMIC_CLASS(wxHtmlFilterPlainText, wxHtmlFilter) - -bool wxHtmlFilterPlainText::CanRead(const wxFSFile& WXUNUSED(file)) const -{ - return TRUE; -} - - - -wxString wxHtmlFilterPlainText::ReadFile(const wxFSFile& file) const -{ - wxInputStream *s = file.GetStream(); - char *src; - wxString doc, doc2; - - if (s == NULL) return wxEmptyString; - src = new char[s -> GetSize()+1]; - src[s -> GetSize()] = 0; - s -> Read(src, s -> GetSize()); - doc = src; - delete [] src; - - doc.Replace(_T("<"), _T("<"), TRUE); - doc.Replace(_T(">"), _T(">"), TRUE); - doc2 = _T("
\n") + doc + _T("\n
"); - return doc2; -} - - - - - -//-------------------------------------------------------------------------------- -// wxHtmlFilterImage -// filter for image/* -//-------------------------------------------------------------------------------- - -class wxHtmlFilterImage : public wxHtmlFilter -{ - DECLARE_DYNAMIC_CLASS(wxHtmlFilterImage) - - public: - virtual bool CanRead(const wxFSFile& file) const; - virtual wxString ReadFile(const wxFSFile& file) const; -}; - -IMPLEMENT_DYNAMIC_CLASS(wxHtmlFilterImage, wxHtmlFilter) - - - -bool wxHtmlFilterImage::CanRead(const wxFSFile& file) const -{ - return (file.GetMimeType().Left(6) == "image/"); -} - - - -wxString wxHtmlFilterImage::ReadFile(const wxFSFile& file) const -{ - return (""); -} - - - - -//-------------------------------------------------------------------------------- -// wxHtmlFilterPlainText -// filter for text/plain or uknown -//-------------------------------------------------------------------------------- - -class wxHtmlFilterHTML : public wxHtmlFilter -{ - DECLARE_DYNAMIC_CLASS(wxHtmlFilterHTML) - - public: - virtual bool CanRead(const wxFSFile& file) const; - virtual wxString ReadFile(const wxFSFile& file) const; -}; - - -IMPLEMENT_DYNAMIC_CLASS(wxHtmlFilterHTML, wxHtmlFilter) - -bool wxHtmlFilterHTML::CanRead(const wxFSFile& file) const -{ -// return (file.GetMimeType() == "text/html"); -// This is true in most case but some page can return: -// "text/html; char-encoding=...." -// So we use Find instead - return (file.GetMimeType().Find(_T("text/html")) == 0); -} - - - -wxString wxHtmlFilterHTML::ReadFile(const wxFSFile& file) const -{ - wxInputStream *s = file.GetStream(); - char *src; - wxString doc; - - if (s == NULL) return wxEmptyString; - src = new char[s -> GetSize() + 1]; - src[s -> GetSize()] = 0; - s -> Read(src, s -> GetSize()); - doc = src; - delete[] src; - - return doc; -} - - - - -///// Module: - -class wxHtmlFilterModule : public wxModule -{ - DECLARE_DYNAMIC_CLASS(wxHtmlFilterModule) - - public: - virtual bool OnInit() - { - wxHtmlWindow::AddFilter(new wxHtmlFilterHTML); - wxHtmlWindow::AddFilter(new wxHtmlFilterImage); - return TRUE; - } - virtual void OnExit() {} -}; - -IMPLEMENT_DYNAMIC_CLASS(wxHtmlFilterModule, wxModule) - -#endif diff --git a/src/html/htmlhelp.cpp b/src/html/htmlhelp.cpp deleted file mode 100644 index 8ea6466640..0000000000 --- a/src/html/htmlhelp.cpp +++ /dev/null @@ -1,839 +0,0 @@ -// Name: htmlhelp.cpp -// Purpose: Help controller -// Author: Vaclav Slavik -// Copyright: (c) 1999 Vaclav Slavik -// Licence: wxWindows Licence -///////////////////////////////////////////////////////////////////////////// - -#error This file should not be compiled! Update your build system! \ -(configure users, rerun configure to get a new Makefile) \ -Instead of htmlhelp[_io], use helpdata, helpfrm and helpctrl. This \ -file is only left to point out the problem and will be removed r.s.n. - -#ifdef __GNUG__ -#pragma implementation "htmlhelp.h" -#endif - -#include "wx/wxprec.h" - -#if wxUSE_HTML - -#ifdef __BORDLANDC__ -#pragma hdrstop -#endif - -#ifndef WXPRECOMP -#endif - -#include -#include -#include -#include -#include -#include -#include -#include - -#if !((wxVERSION_NUMBER < 2100) || ((wxVERSION_NUMBER == 2100) && (wxBETA_NUMBER < 7))) -#include -#endif - - -// Bitmaps: - -#ifndef __WXMSW__ - // XPM hack: make the arrays const - #define static static const - - #include "bitmaps/panel.xpm" - #include "bitmaps/back.xpm" - #include "bitmaps/forward.xpm" - #include "bitmaps/book.xpm" - #include "bitmaps/folder.xpm" - #include "bitmaps/page.xpm" - - #undef static -#endif - -#include "search.h" - - - - -#include -WX_DEFINE_OBJARRAY(HtmlBookRecArray) - - - - - - - - - -//----------------------------------------------------------------------------- -// wxHtmlHelpController -//----------------------------------------------------------------------------- - - -IMPLEMENT_DYNAMIC_CLASS(wxHtmlHelpController, wxEvtHandler) - - -wxHtmlHelpController::wxHtmlHelpController() : wxEvtHandler() -{ - m_Frame = NULL; - m_Config = NULL; - m_ConfigRoot = wxEmptyString; - m_TitleFormat = _("Help : %s"); - m_TempPath = wxEmptyString; - - m_Cfg.x = m_Cfg.y = 0; - m_Cfg.w = 700; m_Cfg.h = 480; - m_Cfg.sashpos = 240; - m_Cfg.navig_on = TRUE; - - m_ContentsImageList = new wxImageList(12, 12); - m_ContentsImageList -> Add(wxICON(book)); - m_ContentsImageList -> Add(wxICON(folder)); - m_ContentsImageList -> Add(wxICON(page)); - - m_Contents = NULL; - m_ContentsCnt = 0; - m_Index = NULL; - m_IndexCnt = 0; - - m_IndexBox = NULL; - m_ContentsBox = NULL; - m_SearchList = NULL; - m_SearchText = NULL; - m_SearchButton = NULL; - m_HtmlWin = NULL; - m_Splitter = NULL; - m_NavigPan = NULL; -} - - - -wxHtmlHelpController::~wxHtmlHelpController() -{ - int i; - - m_BookRecords.Empty(); - delete m_ContentsImageList; - if (m_Contents) { - for (i = 0; i < m_ContentsCnt; i++) { - delete[] m_Contents[i].m_Page; - delete[] m_Contents[i].m_Name; - } - free(m_Contents); - } - if (m_Index) { - for (i = 0; i < m_IndexCnt; i++) { - delete[] m_Index[i].m_Page; - delete[] m_Index[i].m_Name; - } - free(m_Index); - } -} - - - -void wxHtmlHelpController::SetTempDir(const wxString& path) -{ - if (path == wxEmptyString) m_TempPath = path; - else { - if (wxIsAbsolutePath(path)) m_TempPath = path; - else m_TempPath = wxGetCwd() + "/" + path; - - if (m_TempPath[m_TempPath.Length() - 1] != '/') - m_TempPath << "/"; - } -} - - - - -// Reads one line, stores it into buf and returns pointer to new line or NULL. -static char* ReadLine(char *line, char *buf) -{ - char *writeptr = buf, *readptr = line; - - while (*readptr != 0 && *readptr != '\r' && *readptr != '\n') *(writeptr++) = *(readptr++); - *writeptr = 0; - while (*readptr == '\r' || *readptr == '\n') readptr++; - if (*readptr == 0) return NULL; - else return readptr; -} - - -static wxString SafeFileName(const wxString& s) -{ - wxString res = s; - res.Replace(_T(":"), _T("_"), TRUE); - res.Replace(_T(" "), _T("_"), TRUE); - res.Replace(_T("/"), _T("_"), TRUE); - res.Replace(_T("\\"), _T("_"), TRUE); - res.Replace(_T("#"), _T("_"), TRUE); - res.Replace(_T("."), _T("_"), TRUE); - return res; -} - - -static int IndexCompareFunc(const void *a, const void *b) -{ - return strcmp(((HtmlContentsItem*)a) -> m_Name, ((HtmlContentsItem*)b) -> m_Name); -} - - - -bool wxHtmlHelpController::AddBook(const wxString& book, bool show_wait_msg) -{ - wxFSFile *fi; - wxFileSystem fsys; - wxInputStream *s; - HtmlBookRecord *bookr; - wxString bookFull; - - int sz; - char *buff, *lineptr; - char linebuf[300]; - - wxString title = _("noname"), - safetitle, - start = wxEmptyString, - contents = wxEmptyString, index = wxEmptyString; - - if (wxIsAbsolutePath(book)) bookFull = book; - else bookFull = wxGetCwd() + "/" + book; - - fi = fsys.OpenFile(bookFull); - if (fi == NULL) return FALSE; - fsys.ChangePathTo(bookFull); - s = fi -> GetStream(); - sz = s -> GetSize(); - buff = new char[sz+1]; - buff[sz] = 0; - s -> Read(buff, sz); - lineptr = buff; - delete fi; - - while ((lineptr = ReadLine(lineptr, linebuf)) != NULL) { - if (strstr(linebuf, "Title=") == linebuf) - title = linebuf + strlen("Title="); - if (strstr(linebuf, "Default topic=") == linebuf) - start = linebuf + strlen("Default topic="); - if (strstr(linebuf, "Index file=") == linebuf) - index = linebuf + strlen("Index file="); - if (strstr(linebuf, "Contents file=") == linebuf) - contents = linebuf + strlen("Contents file="); - } - delete[] buff; - - bookr = new HtmlBookRecord(fsys.GetPath(), title, start); - - if (m_ContentsCnt % HTML_REALLOC_STEP == 0) - m_Contents = (HtmlContentsItem*) realloc(m_Contents, (m_ContentsCnt + HTML_REALLOC_STEP) * sizeof(HtmlContentsItem)); - m_Contents[m_ContentsCnt].m_Level = 0; - m_Contents[m_ContentsCnt].m_ID = 0; - m_Contents[m_ContentsCnt].m_Page = new char[start.Length() + 1]; - strcpy(m_Contents[m_ContentsCnt].m_Page, start.c_str()); - m_Contents[m_ContentsCnt].m_Name = new char [title.Length() + 1]; - strcpy(m_Contents[m_ContentsCnt].m_Name, title.c_str()); - m_Contents[m_ContentsCnt].m_Book = bookr; - m_ContentsCnt++; - - // Try to find cached binary versions: - safetitle = SafeFileName(title); - fi = fsys.OpenFile(safetitle + ".cached"); - if (fi == NULL) fi = fsys.OpenFile(m_TempPath + safetitle + ".cached"); - if ((fi == NULL) || (m_TempPath == wxEmptyString)) { - LoadMSProject(bookr, fsys, index, contents, show_wait_msg); - if (m_TempPath != wxEmptyString) { - wxFileOutputStream *outs = new wxFileOutputStream(m_TempPath + safetitle + ".cached"); - SaveCachedBook(bookr, outs); - delete outs; - } - } - else { - LoadCachedBook(bookr, fi -> GetStream()); - delete fi; - } - - m_BookRecords.Add(bookr); - if (m_IndexCnt > 0) - qsort(m_Index, m_IndexCnt, sizeof(HtmlContentsItem), IndexCompareFunc); - - return TRUE; -} - - - - -void wxHtmlHelpController::Display(const wxString& x) -{ - int cnt; - int i; - wxFileSystem fsys; - wxFSFile *f; - - CreateHelpWindow(); - - /* 1. try to open given file: */ - - cnt = m_BookRecords.GetCount(); - for (i = 0; i < cnt; i++) { - f = fsys.OpenFile(m_BookRecords[i].GetBasePath() + x); - if (f) { - m_HtmlWin -> LoadPage(m_BookRecords[i].GetBasePath() + x); - delete f; - return; - } - } - - - /* 2. try to find a book: */ - - for (i = 0; i < cnt; i++) { - if (m_BookRecords[i].GetTitle() == x) { - m_HtmlWin -> LoadPage(m_BookRecords[i].GetBasePath() + m_BookRecords[i].GetStart()); - return; - } - } - - /* 3. try to find in contents: */ - - cnt = m_ContentsCnt; - for (i = 0; i < cnt; i++) { - if (strcmp(m_Contents[i].m_Name, x) == 0) { - m_HtmlWin -> LoadPage(m_Contents[i].m_Book -> GetBasePath() + m_Contents[i].m_Page); - return; - } - } - - - /* 4. try to find in index: */ - - cnt = m_IndexCnt; - for (i = 0; i < cnt; i++) { - if (strcmp(m_Index[i].m_Name, x) == 0) { - m_HtmlWin -> LoadPage(m_Index[i].m_Book -> GetBasePath() + m_Index[i].m_Page); - return; - } - } - - - /* 5. if everything failed, search the documents: */ - - KeywordSearch(x); -} - - - -void wxHtmlHelpController::Display(const int id) -{ - CreateHelpWindow(); - - for (int i = 0; i < m_ContentsCnt; i++) { - if (m_Contents[i].m_ID == id) { - m_HtmlWin -> LoadPage(m_Contents[i].m_Book -> GetBasePath() + m_Contents[i].m_Page); - return; - } - } -} - - - -void wxHtmlHelpController::DisplayContents() -{ - CreateHelpWindow(); - m_Frame -> Raise(); - if (!m_Splitter -> IsSplit()) { - m_NavigPan -> Show(TRUE); - m_HtmlWin -> Show(TRUE); - m_Splitter -> SplitVertically(m_NavigPan, m_HtmlWin, m_Cfg.sashpos); - } - m_NavigPan -> SetSelection(0); -} - - - -void wxHtmlHelpController::DisplayIndex() -{ - CreateHelpWindow(); - m_Frame -> Raise(); - if (!m_Splitter -> IsSplit()) { - m_NavigPan -> Show(TRUE); - m_HtmlWin -> Show(TRUE); - m_Splitter -> SplitVertically(m_NavigPan, m_HtmlWin, m_Cfg.sashpos); - } - m_NavigPan -> SetSelection(1); -} - - - - -#if (wxVERSION_NUMBER < 2100) || ((wxVERSION_NUMBER == 2100) && (wxBETA_NUMBER < 7)) - -class MyProgressDlg : public wxDialog -{ - public: - bool m_Canceled; - - MyProgressDlg(wxWindow *parent) : wxDialog(parent, -1, - _("Searching..."), - wxPoint(0, 0), -#ifdef __WXGTK__ - wxSize(300, 110) -#else - wxSize(300, 130) -#endif - ) - {m_Canceled = FALSE;} - void OnCancel(wxCommandEvent& event) {m_Canceled = TRUE;} - DECLARE_EVENT_TABLE() -}; -BEGIN_EVENT_TABLE(MyProgressDlg, wxDialog) - EVT_BUTTON(wxID_CANCEL, MyProgressDlg::OnCancel) -END_EVENT_TABLE() - -#endif - - -bool wxHtmlHelpController::KeywordSearch(const wxString& keyword) -{ - int foundcnt = 0; - CreateHelpWindow(); - // if these are not set, we can't continue - if (! (m_SearchList && m_HtmlWin)) - return FALSE; - m_Frame -> Raise(); - if (m_Splitter && m_NavigPan && m_SearchButton) { - if (!m_Splitter -> IsSplit()) { - m_NavigPan -> Show(TRUE); - m_HtmlWin -> Show(TRUE); - m_Splitter -> SplitVertically(m_NavigPan, m_HtmlWin, m_Cfg.sashpos); - } - m_NavigPan -> SetSelection(2); - m_SearchList -> Clear(); - m_SearchText -> SetValue(keyword); - m_SearchButton -> Enable(FALSE); - } - { - int cnt = m_ContentsCnt; - wxSearchEngine engine; - wxFileSystem fsys; - wxFSFile *file; - wxString lastpage = wxEmptyString; - wxString foundstr; - -#if (wxVERSION_NUMBER < 2100) || ((wxVERSION_NUMBER == 2100) && (wxBETA_NUMBER < 7)) - MyProgressDlg progress(m_Frame); - - wxStaticText *prompt = new wxStaticText(&progress, -1, "", wxPoint(20, 50), wxSize(260, 25), wxALIGN_CENTER); - wxGauge *gauge = new wxGauge(&progress, -1, cnt, wxPoint(20, 20), wxSize(260, 25)); - wxButton *btn = new wxButton(&progress, wxID_CANCEL, _("Cancel"), wxPoint(110, 70), wxSize(80, 25)); - btn = btn; /* fool compiler :-) */ - prompt -> SetLabel(_("No matching page found yet")); - - progress.Centre(wxBOTH); - progress.Show(TRUE); -#else - wxProgressDialog progress(_("Searching..."), _("No matching page found yet"), cnt, m_Frame, wxPD_APP_MODAL | wxPD_CAN_ABORT | wxPD_AUTO_HIDE); -#endif - - engine.LookFor(keyword); - - for (int i = 0; i < cnt; i++) { -#if (wxVERSION_NUMBER < 2100) || ((wxVERSION_NUMBER == 2100) && (wxBETA_NUMBER < 7)) - gauge -> SetValue(i); - if (progress.m_Canceled) break; -#else - if (progress.Update(i) == FALSE) break; -#endif - wxYield(); - - file = fsys.OpenFile(m_Contents[i].m_Book -> GetBasePath() + m_Contents[i].m_Page); - if (file) { - if (lastpage != file -> GetLocation()) { - lastpage = file -> GetLocation(); - if (engine.Scan(file -> GetStream())) { - foundstr.Printf(_("Found %i matches"), ++foundcnt); -#if (wxVERSION_NUMBER < 2100) || ((wxVERSION_NUMBER == 2100) && (wxBETA_NUMBER < 7)) - prompt -> SetLabel(foundstr); -#else - progress.Update(i, foundstr); -#endif - wxYield(); - m_SearchList -> Append(m_Contents[i].m_Name, (char*)(m_Contents + i)); - } - } - delete file; - } - } - -#if (wxVERSION_NUMBER < 2100) || ((wxVERSION_NUMBER == 2100) && (wxBETA_NUMBER < 7)) - progress.Close(TRUE); -#endif - } - if (m_SearchButton) - m_SearchButton -> Enable(TRUE); - if (m_SearchText) { - m_SearchText -> SetSelection(0, keyword.Length()); - m_SearchText -> SetFocus(); - } - if (foundcnt) { - HtmlContentsItem *it = (HtmlContentsItem*) m_SearchList -> GetClientData(0); - if (it) m_HtmlWin -> LoadPage(it -> m_Book -> GetBasePath() + it -> m_Page); - } - return (foundcnt > 0); -} - - - - - - -void wxHtmlHelpController::CreateHelpWindow() -{ - wxBusyCursor cur; - wxString oldpath; - wxStatusBar *sbar; - - if (m_Frame) { - m_Frame -> Raise(); - m_Frame -> Show(TRUE); - return; - } - -#if wxUSE_BUSYINFO - wxBusyInfo busyinfo(_("Preparing help window...")); -#endif - - if (m_Config) ReadCustomization(m_Config, m_ConfigRoot); - - m_Frame = new wxFrame(NULL, -1, "", wxPoint(m_Cfg.x, m_Cfg.y), wxSize(m_Cfg.w, m_Cfg.h)); - m_Frame -> PushEventHandler(this); - sbar = m_Frame -> CreateStatusBar(); - - { - wxToolBar *toolBar; - toolBar = m_Frame -> CreateToolBar(wxNO_BORDER | wxTB_HORIZONTAL | wxTB_FLAT | wxTB_DOCKABLE); - toolBar -> SetMargins(2, 2); - wxBitmap* toolBarBitmaps[3]; - -#ifdef __WXMSW__ - toolBarBitmaps[0] = new wxBitmap("panel"); - toolBarBitmaps[1] = new wxBitmap("back"); - toolBarBitmaps[2] = new wxBitmap("forward"); - int width = 24; -#else - toolBarBitmaps[0] = new wxBitmap(panel_xpm); - toolBarBitmaps[1] = new wxBitmap(back_xpm); - toolBarBitmaps[2] = new wxBitmap(forward_xpm); - int width = 16; -#endif - - int currentX = 5; - - toolBar -> AddTool(wxID_HTML_PANEL, *(toolBarBitmaps[0]), wxNullBitmap, FALSE, currentX, -1, (wxObject *) NULL, _("Show/hide navigation panel")); - currentX += width + 5; - toolBar -> AddSeparator(); - toolBar -> AddTool(wxID_HTML_BACK, *(toolBarBitmaps[1]), wxNullBitmap, FALSE, currentX, -1, (wxObject *) NULL, _("Go back to the previous HTML page")); - currentX += width + 5; - toolBar -> AddTool(wxID_HTML_FORWARD, *(toolBarBitmaps[2]), wxNullBitmap, FALSE, currentX, -1, (wxObject *) NULL, _("Go forward to the next HTML page")); - currentX += width + 5; - - toolBar -> Realize(); - - // Can delete the bitmaps since they're reference counted - for (int i = 0; i < 3; i++) delete toolBarBitmaps[i]; - } - - - { - m_Splitter = new wxSplitterWindow(m_Frame); - - m_HtmlWin = new wxHtmlWindow(m_Splitter); - m_HtmlWin -> SetRelatedFrame(m_Frame, m_TitleFormat); - m_HtmlWin -> SetRelatedStatusBar(0); - if (m_Config) m_HtmlWin -> ReadCustomization(m_Config, m_ConfigRoot); - - m_NavigPan = new wxNotebook(m_Splitter, wxID_HTML_NOTEBOOK, wxDefaultPosition, wxDefaultSize); - { - m_ContentsBox = new wxTreeCtrl(m_NavigPan, wxID_HTML_TREECTRL, wxDefaultPosition, wxDefaultSize, wxTR_HAS_BUTTONS | wxSUNKEN_BORDER); - m_ContentsBox -> SetImageList(m_ContentsImageList); - m_NavigPan -> AddPage(m_ContentsBox, _("Contents")); - } - - { - wxWindow *dummy = new wxPanel(m_NavigPan, wxID_HTML_INDEXPAGE); - wxLayoutConstraints *b1 = new wxLayoutConstraints; - b1 -> top.SameAs (dummy, wxTop, 0); - b1 -> left.SameAs (dummy, wxLeft, 0); - b1 -> width.PercentOf (dummy, wxWidth, 100); - b1 -> bottom.SameAs (dummy, wxBottom, 0); - m_IndexBox = new wxListBox(dummy, wxID_HTML_INDEXLIST, wxDefaultPosition, wxDefaultSize, 0); - m_IndexBox -> SetConstraints(b1); - dummy -> SetAutoLayout(TRUE); - m_NavigPan -> AddPage(dummy, _("Index")); - } - - { - wxWindow *dummy = new wxPanel(m_NavigPan, wxID_HTML_SEARCHPAGE); - - wxLayoutConstraints *b1 = new wxLayoutConstraints; - m_SearchText = new wxTextCtrl(dummy, wxID_HTML_SEARCHTEXT); - b1 -> top.SameAs (dummy, wxTop, 0); - b1 -> left.SameAs (dummy, wxLeft, 0); - b1 -> right.SameAs (dummy, wxRight, 0); - b1 -> height.AsIs(); - m_SearchText -> SetConstraints(b1); - - wxLayoutConstraints *b2 = new wxLayoutConstraints; - m_SearchButton = new wxButton(dummy, wxID_HTML_SEARCHBUTTON, _("Search!")); - b2 -> top.Below (m_SearchText, 10); - b2 -> right.SameAs (dummy, wxRight, 10); - b2 -> width.AsIs(); - b2 -> height.AsIs(); - m_SearchButton -> SetConstraints(b2); - - wxLayoutConstraints *b3 = new wxLayoutConstraints; - m_SearchList = new wxListBox(dummy, wxID_HTML_SEARCHLIST, wxDefaultPosition, wxDefaultSize, 0); - b3 -> top.Below (m_SearchButton, 10); - b3 -> left.SameAs (dummy, wxLeft, 0); - b3 -> right.SameAs (dummy, wxRight, 0); - b3 -> bottom.SameAs (dummy, wxBottom, 0); - m_SearchList -> SetConstraints(b3); - - dummy -> SetAutoLayout(TRUE); - dummy -> Layout(); - m_NavigPan -> AddPage(dummy, _("Search")); - } - - RefreshLists(); - m_NavigPan -> Show(TRUE); - m_HtmlWin -> Show(TRUE); - m_Splitter -> SetMinimumPaneSize(20); - m_Splitter -> SplitVertically(m_NavigPan, m_HtmlWin, m_Cfg.sashpos); - if (!m_Cfg.navig_on) m_Splitter -> Unsplit(m_NavigPan); - wxYield(); - } - - m_Frame -> Show(TRUE); - wxYield(); -} - - - -#define MAX_ROOTS 64 - -void wxHtmlHelpController::CreateContents() -{ - HtmlContentsItem *it; - wxTreeItemId roots[MAX_ROOTS]; - bool imaged[MAX_ROOTS]; - int count = m_ContentsCnt; - - m_ContentsBox -> DeleteAllItems(); - roots[0] = m_ContentsBox -> AddRoot(_("(Help)")); - imaged[0] = TRUE; - - for (int i = 0; i < count; i++) { - it = m_Contents + i; - roots[it -> m_Level + 1] = m_ContentsBox -> AppendItem(roots[it -> m_Level], it -> m_Name, IMG_Page, -1, new wxHtmlHelpTreeItemData(it)); - if (it -> m_Level == 0) { - m_ContentsBox -> SetItemBold(roots[1], TRUE); - m_ContentsBox -> SetItemImage(roots[1], IMG_Book); - m_ContentsBox -> SetItemSelectedImage(roots[1], IMG_Book); - imaged[1] = TRUE; - } - else imaged[it -> m_Level + 1] = FALSE; - - if (!imaged[it -> m_Level]) { - m_ContentsBox -> SetItemImage(roots[it -> m_Level], IMG_Folder); - m_ContentsBox -> SetItemSelectedImage(roots[it -> m_Level], IMG_Folder); - imaged[it -> m_Level] = TRUE; - } - } - - m_ContentsBox -> Expand(roots[0]); -} - - - - -void wxHtmlHelpController::CreateIndex() -{ - m_IndexBox -> Clear(); - - for (int i = 0; i < m_IndexCnt; i++) - m_IndexBox -> Append(m_Index[i].m_Name, (char*)(m_Index + i)); -} - - - -void wxHtmlHelpController::RefreshLists() -{ - if (m_Frame) { - CreateContents(); - CreateIndex(); - m_SearchList -> Clear(); - } -} - - - - - - - -void wxHtmlHelpController::ReadCustomization(wxConfigBase *cfg, wxString path) -{ - wxString oldpath; - wxString tmp; - - if (path != wxEmptyString) { - oldpath = cfg -> GetPath(); - cfg -> SetPath(path); - } - - m_Cfg.navig_on = cfg -> Read("hcNavigPanel", m_Cfg.navig_on) != 0; - m_Cfg.sashpos = cfg -> Read("hcSashPos", m_Cfg.sashpos); - m_Cfg.x = cfg -> Read("hcX", m_Cfg.x); - m_Cfg.y = cfg -> Read("hcY", m_Cfg.y); - m_Cfg.w = cfg -> Read("hcW", m_Cfg.w); - m_Cfg.h = cfg -> Read("hcH", m_Cfg.h); - - if (path != wxEmptyString) - cfg -> SetPath(oldpath); -} - - - -void wxHtmlHelpController::WriteCustomization(wxConfigBase *cfg, wxString path) -{ - wxString oldpath; - wxString tmp; - - if (path != wxEmptyString) { - oldpath = cfg -> GetPath(); - cfg -> SetPath(path); - } - - cfg -> Write("hcNavigPanel", m_Cfg.navig_on); - cfg -> Write("hcSashPos", (long)m_Cfg.sashpos); - cfg -> Write("hcX", (long)m_Cfg.x); - cfg -> Write("hcY", (long)m_Cfg.y); - cfg -> Write("hcW", (long)m_Cfg.w); - cfg -> Write("hcH", (long)m_Cfg.h); - - if (path != wxEmptyString) - cfg -> SetPath(oldpath); -} - - - - - -/* -EVENT HANDLING : -*/ - - -void wxHtmlHelpController::OnToolbar(wxCommandEvent& event) -{ - switch (event.GetId()) { - case wxID_HTML_BACK : - m_HtmlWin -> HistoryBack(); - break; - case wxID_HTML_FORWARD : - m_HtmlWin -> HistoryForward(); - break; - case wxID_HTML_PANEL : - if (m_Splitter -> IsSplit()) { - m_Cfg.sashpos = m_Splitter -> GetSashPosition(); - m_Splitter -> Unsplit(m_NavigPan); - } - else { - m_NavigPan -> Show(TRUE); - m_HtmlWin -> Show(TRUE); - m_Splitter -> SplitVertically(m_NavigPan, m_HtmlWin, m_Cfg.sashpos); - } - break; - } -} - - - -void wxHtmlHelpController::OnContentsSel(wxTreeEvent& event) -{ - wxHtmlHelpTreeItemData *pg; - - pg = (wxHtmlHelpTreeItemData*) m_ContentsBox -> GetItemData(event.GetItem()); - if (pg) m_HtmlWin -> LoadPage(pg -> GetPage()); -} - - - -void wxHtmlHelpController::OnIndexSel(wxCommandEvent& event) -{ - HtmlContentsItem *it = (HtmlContentsItem*) m_IndexBox -> GetClientData(m_IndexBox -> GetSelection()); - if (it) m_HtmlWin -> LoadPage(it -> m_Book -> GetBasePath() + it -> m_Page); -} - - - -void wxHtmlHelpController::OnSearchSel(wxCommandEvent& event) -{ - HtmlContentsItem *it = (HtmlContentsItem*) m_SearchList -> GetClientData(m_SearchList -> GetSelection()); - if (it) m_HtmlWin -> LoadPage(it -> m_Book -> GetBasePath() + it -> m_Page); -} - - - -void wxHtmlHelpController::OnCloseWindow(wxCloseEvent& event) -{ - int a, b; - - m_Cfg.navig_on = m_Splitter -> IsSplit(); - if (m_Cfg.navig_on) - m_Cfg.sashpos = m_Splitter -> GetSashPosition(); - m_Frame -> GetPosition(&a, &b); - m_Cfg.x = a, m_Cfg.y = b; - m_Frame -> GetSize(&a, &b); - m_Cfg.w = a, m_Cfg.h = b; - - if (m_Config) { - WriteCustomization(m_Config, m_ConfigRoot); - m_HtmlWin -> WriteCustomization(m_Config, m_ConfigRoot); - } - m_Frame = NULL; - - event.Skip(); -} - - - -void wxHtmlHelpController::OnSearch(wxCommandEvent& event) -{ - wxString sr = m_SearchText -> GetLineText(0); - - if (sr != wxEmptyString) KeywordSearch(sr); -} - - - -BEGIN_EVENT_TABLE(wxHtmlHelpController, wxEvtHandler) - EVT_TOOL_RANGE(wxID_HTML_PANEL, wxID_HTML_FORWARD, wxHtmlHelpController::OnToolbar) - EVT_TREE_SEL_CHANGED(wxID_HTML_TREECTRL, wxHtmlHelpController::OnContentsSel) - EVT_LISTBOX(wxID_HTML_INDEXLIST, wxHtmlHelpController::OnIndexSel) - EVT_LISTBOX(wxID_HTML_SEARCHLIST, wxHtmlHelpController::OnSearchSel) - EVT_CLOSE(wxHtmlHelpController::OnCloseWindow) - EVT_BUTTON(wxID_HTML_SEARCHBUTTON, wxHtmlHelpController::OnSearch) - EVT_TEXT_ENTER(wxID_HTML_SEARCHTEXT, wxHtmlHelpController::OnSearch) -END_EVENT_TABLE() - - - -#endif - diff --git a/src/html/search.cpp b/src/html/search.cpp deleted file mode 100644 index 85a59a2b2e..0000000000 --- a/src/html/search.cpp +++ /dev/null @@ -1,72 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Name: search.cpp -// Purpose: search engine -// Author: Vaclav Slavik -// RCS-ID: $Id$ -// Copyright: (c) 1999 Vaclav Slavik -// Licence: wxWindows Licence -///////////////////////////////////////////////////////////////////////////// - - - -#ifdef __GNUG__ -#pragma implementation -#endif - -#include "wx/wxprec.h" - -#include "wx/defs.h" -#if wxUSE_HTML - -#ifdef __BORDLANDC__ -#pragma hdrstop -#endif - -#ifndef WXPRECOMP -#endif - -#include "wx/html/helpdata.h" - - -//-------------------------------------------------------------------------------- -// wxSearchEngine -//-------------------------------------------------------------------------------- - -void wxSearchEngine::LookFor(const wxString& keyword) -{ - if (m_Keyword) delete[] m_Keyword; - m_Keyword = new wxChar[keyword.Length() + 1]; - wxStrcpy(m_Keyword, keyword.c_str()); - for (int i = wxStrlen(m_Keyword) - 1; i >= 0; i--) - if ((m_Keyword[i] >= wxT('A')) && (m_Keyword[i] <= wxT('Z'))) - m_Keyword[i] += wxT('a') - wxT('A'); -} - - - -bool wxSearchEngine::Scan(wxInputStream *stream) -{ - wxASSERT_MSG(m_Keyword != NULL, _("wxSearchEngine::LookFor must be called before scanning!")); - - int i, j; - int lng = stream ->GetSize(); - int wrd = wxStrlen(m_Keyword); - bool found = FALSE; - char *buf = new char[lng + 1]; - stream -> Read(buf, lng); - buf[lng] = 0; - - for (i = 0; i < lng; i++) - if ((buf[i] >= 'A') && (buf[i] <= 'Z')) buf[i] += 'a' - 'A'; - - for (i = 0; i < lng - wrd; i++) { - j = 0; - while ((j < wrd) && (buf[i + j] == m_Keyword[j])) j++; - if (j == wrd) {found = TRUE; break;} - } - - delete[] buf; - return found; -} - -#endif diff --git a/src/png/makefile.nt b/src/png/makefile.nt deleted file mode 100644 index ab823c8fc9..0000000000 --- a/src/png/makefile.nt +++ /dev/null @@ -1,68 +0,0 @@ -# -# File: makefile.nt -# Author: Julian Smart -# Created: 1993 -# Updated: -# Copyright: (c) 1993, AIAI, University of Edinburgh -# -# "%W% %G%" -# -# Makefile : Builds winpng.lib library for Windows 3.1 - -# Change WXDIR or WXWIN to wherever wxWindows is found -WXDIR = $(WXWIN) -WXLIB = $(WXDIR)\lib\wx.lib -WXINC = $(WXDIR)\include - -WINPNGDIR = ..\png -WINPNGINC = $(WINPNGDIR) -WINPNGLIB = ..\..\lib\winpng.lib - -INC = /I..\zlib - -FINAL=1 - -# Set this to nothing if your compiler is MS C++ 7 -ZOPTION= - -!ifndef FINAL -FINAL=0 -!endif - -PRECOMP=/YuWX.H - -!if "$(FINAL)" == "0" -OPT = /Od -CPPFLAGS= /W4 /Zi /MD /GX- $(ZOPTION) $(OPT) /Dwx_msw $(INC) # $(PRECOMP) /Fp$(WXDIR)\src\msw\wx.pch -CFLAGS= /W4 /Zi /MD /GX- /Od /Dwx_msw $(INC) -LINKFLAGS=/NOD /CO /ONERROR:NOEXE -!else -# /Ox for real FINAL version -OPT = /O2 -CPPFLAGS= /W4 /MD /GX- /Dwx_msw $(INC) # $(PRECOMP) /Fp$(WXDIR)\src\msw\wx.pch -CFLAGS= /W4 /MD /GX- /Dwx_msw $(INC) -LINKFLAGS=/NOD /ONERROR:NOEXE -!endif - -OBJECTS = png.obj pngread.obj pngrtran.obj pngrutil.obj \ - pngpread.obj pngtrans.obj pngwrite.obj pngwtran.obj pngwutil.obj \ - pngerror.obj pngmem.obj pngwio.obj pngrio.obj pngget.obj pngset.obj - -all: $(WINPNGLIB) - -$(WINPNGLIB): $(OBJECTS) - erase $(WINPNGLIB) - lib @<< --out:$(WINPNGLIB) -$(OBJECTS) -<< - -.c.obj: - cl -DWIN32 $(OPT) $(CFLAGS) /c $*.c - -clean: - erase *.obj - erase *.exe - erase *.lib - -cleanall: clean diff --git a/src/regex/COPYRIGHT b/src/regex/COPYRIGHT index 65aaadd6cf..30c1f7a488 100644 --- a/src/regex/COPYRIGHT +++ b/src/regex/COPYRIGHT @@ -1,166 +1,20 @@ -This regular expression package was originally developed by Henry Spencer. -It bears the following copyright notice: +Copyright 1992, 1993, 1994, 1997 Henry Spencer. All rights reserved. +This software is not subject to any license of the American Telephone +and Telegraph Company or of the Regents of the University of California. -********************************************************************** +Permission is granted to anyone to use this software for any purpose on +any computer system, and to alter it and redistribute it, subject +to the following restrictions: -Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. +1. The author is not responsible for the consequences of use of this + software, no matter how awful, even if they arise from flaws in it. -Development of this software was funded, in part, by Cray Research Inc., -UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics -Corporation, none of whom are responsible for the results. The author -thanks all of them. +2. The origin of this software must not be misrepresented, either by + explicit claim or by omission. Since few users ever read sources, + credits must appear in the documentation. -Redistribution and use in source and binary forms -- with or without -modification -- are permitted for any purpose, provided that -redistributions in source form retain this entire copyright notice and -indicate the origin and nature of any modifications. - -I'd appreciate being given credit for this package in the documentation -of software which uses it, but that is not a requirement. - -THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, -INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY -AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL -HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; -OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, -WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR -OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF -ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -********************************************************************** - -PostgreSQL adopted the code out of Tcl 8.4.1. Portions of regc_locale.c -and re_syntax.n were developed by Tcl developers other than Henry; these -files bear the Tcl copyright and license notice: - -********************************************************************** - -This software is copyrighted by the Regents of the University of -California, Sun Microsystems, Inc., Scriptics Corporation, ActiveState -Corporation and other parties. The following terms apply to all files -associated with the software unless explicitly disclaimed in -individual files. - -The authors hereby grant permission to use, copy, modify, distribute, -and license this software and its documentation for any purpose, provided -that existing copyright notices are retained in all copies and that this -notice is included verbatim in any distributions. No written agreement, -license, or royalty fee is required for any of the authorized uses. -Modifications to this software may be copyrighted by their authors -and need not follow the licensing terms described here, provided that -the new terms are clearly indicated on the first page of each file where -they apply. - -IN NO EVENT SHALL THE AUTHORS OR DISTRIBUTORS BE LIABLE TO ANY PARTY -FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES -ARISING OUT OF THE USE OF THIS SOFTWARE, ITS DOCUMENTATION, OR ANY -DERIVATIVES THEREOF, EVEN IF THE AUTHORS HAVE BEEN ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. - -THE AUTHORS AND DISTRIBUTORS SPECIFICALLY DISCLAIM ANY WARRANTIES, -INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE, AND NON-INFRINGEMENT. THIS SOFTWARE -IS PROVIDED ON AN "AS IS" BASIS, AND THE AUTHORS AND DISTRIBUTORS HAVE -NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR -MODIFICATIONS. - -GOVERNMENT USE: If you are acquiring this software on behalf of the -U.S. government, the Government shall have only "Restricted Rights" -in the software and related documentation as defined in the Federal -Acquisition Regulations (FARs) in Clause 52.227.19 (c) (2). If you -are acquiring the software on behalf of the Department of Defense, the -software shall be classified as "Commercial Computer Software" and the -Government shall have only "Restricted Rights" as defined in Clause -252.227-7013 (c) (1) of DFARs. Notwithstanding the foregoing, the -authors grant the U.S. Government and others acting in its behalf -permission to use and distribute the software in accordance with the -terms specified in this license. - -********************************************************************** - -Subsequent modifications to the code by the PostgreSQL project follow -the same license terms as the rest of PostgreSQL. -(License follows) -**************************************************************************** -PostgreSQL Database Management System -(formerly known as Postgres, then as Postgres95) - -Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group - -Portions Copyright (c) 1994, The Regents of the University of California - -Permission to use, copy, modify, and distribute this software and its -documentation for any purpose, without fee, and without a written agreement -is hereby granted, provided that the above copyright notice and this -paragraph and the following two paragraphs appear in all copies. - -IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY FOR -DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING -LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS -DOCUMENTATION, EVEN IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. - -THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES, -INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY -AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS -ON AN "AS IS" BASIS, AND THE UNIVERSITY OF CALIFORNIA HAS NO OBLIGATIONS TO -PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. -**************************************************************************** -And if that's not enough, changes made from wxWindows are put under the -wxWindows license: -**************************************************************************** - wxWindows Library Licence, Version 3 - ==================================== - - Copyright (C) 1998 Julian Smart, Robert Roebling [, ...] - - Everyone is permitted to copy and distribute verbatim copies - of this licence document, but changing it is not allowed. - - WXWINDOWS LIBRARY LICENCE - TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION - - This library is free software; you can redistribute it and/or modify it - under the terms of the GNU Library General Public Licence as published by - the Free Software Foundation; either version 2 of the Licence, or (at - your option) any later version. - - This library is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library - General Public Licence for more details. - - You should have received a copy of the GNU Library General Public Licence - along with this software, usually in a file named COPYING.LIB. If not, - write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, - Boston, MA 02111-1307 USA. - - EXCEPTION NOTICE - - 1. As a special exception, the copyright holders of this library give - permission for additional uses of the text contained in this release of - the library as licenced under the wxWindows Library Licence, applying - either version 3 of the Licence, or (at your option) any later version of - the Licence as published by the copyright holders of version 3 of the - Licence document. - - 2. The exception is that you may use, copy, link, modify and distribute - under the user's own terms, binary object code versions of works based - on the Library. - - 3. If you copy code from files distributed under the terms of the GNU - General Public Licence or the GNU Library General Public Licence into a - copy of this library, as this licence permits, the exception does not - apply to the code that you add in this way. To avoid misleading anyone as - to the status of such modified files, you must delete this exception - notice from such code and/or adjust the licensing conditions notice - accordingly. - - 4. If you write modifications of your own for this library, it is your - choice whether to permit this exception to apply to your modifications. - If you do not wish that, you must delete the exception notice from such - code and/or adjust the licensing conditions notice accordingly. -**************************************************************************** +3. Altered versions must be plainly marked as such, and must not be + misrepresented as being the original software. Since few users + ever read sources, credits must appear in the documentation. +4. This notice may not be removed or altered. diff --git a/src/regex/Makefile b/src/regex/Makefile index cb76d37ea7..ce20561fa9 100644 --- a/src/regex/Makefile +++ b/src/regex/Makefile @@ -1,28 +1,130 @@ -#------------------------------------------------------------------------- -# -# Makefile-- -# Makefile for backend/regex -# -# IDENTIFICATION -# $Header: /projects/cvsroot/pgsql-server/src/backend/regex/Makefile,v 1.20 2003/02/05 17:41:32 tgl Exp $ -# -#------------------------------------------------------------------------- +# You probably want to take -DREDEBUG out of CFLAGS, and put something like +# -O in, *after* testing (-DREDEBUG strengthens testing by enabling a lot of +# internal assertion checking and some debugging facilities). +# Put -Dconst= in for a pre-ANSI compiler. +# Do not take -DPOSIX_MISTAKE out. +# REGCFLAGS isn't important to you (it's for my use in some special contexts). +CFLAGS=-I. -DPOSIX_MISTAKE -DREDEBUG $(REGCFLAGS) -subdir = src/backend/regex -top_builddir = ../../.. -include $(top_builddir)/src/Makefile.global +# If you have a pre-ANSI compiler, put -o into MKHFLAGS. If you want +# the Berkeley __P macro, put -b in. +MKHFLAGS= -OBJS = regcomp.o regerror.o regexec.o regfree.o +# Flags for linking but not compiling, if any. +LDFLAGS= -all: SUBSYS.o +# Extra libraries for linking, if any. +LIBS= -SUBSYS.o: $(OBJS) - $(LD) $(LDREL) $(LDOUT) SUBSYS.o $(OBJS) +# Internal stuff, should not need changing. +OBJPRODN=regcomp.o regexec.o regerror.o regfree.o +OBJS=$(OBJPRODN) split.o debug.o re_main.o +H=cclass.h cname.h regex2.h utils.h +REGSRC=regcomp.c regerror.c regexec.c regfree.c +ALLSRC=$(REGSRC) engine.c debug.c re_main.c split.c -# mark inclusion dependencies between .c files explicitly -regcomp.o: regcomp.c regc_lex.c regc_color.c regc_nfa.c regc_cvec.c regc_locale.c +# Stuff that matters only if you're trying to lint the package. +LINTFLAGS=-I. -Dstatic= -Dconst= -DREDEBUG +LINTC=regcomp.c regexec.c regerror.c regfree.c debug.c re_main.c +JUNKLINT=possible pointer alignment|null effect -regexec.o: regexec.c rege_dfa.c +# arrangements to build forward-reference header files +.SUFFIXES: .ih .h +.c.ih: + sh ./mkh $(MKHFLAGS) -p $< >$@ -clean: - rm -f SUBSYS.o $(OBJS) +default: r + +lib: purge $(OBJPRODN) + rm -f libregex.a + ar crv libregex.a $(OBJPRODN) + +purge: + rm -f *.o + +# stuff to build regex.h +REGEXH=regex.h +REGEXHSRC=regex2.h $(REGSRC) +$(REGEXH): $(REGEXHSRC) mkh + sh ./mkh $(MKHFLAGS) -i _REGEX_H_ $(REGEXHSRC) >regex.tmp + cmp -s regex.tmp regex.h 2>/dev/null || cp regex.tmp regex.h + rm -f regex.tmp + +# dependencies +$(OBJPRODN) debug.o: utils.h regex.h regex2.h +regcomp.o: cclass.h cname.h regcomp.ih +regexec.o: engine.c engine.ih +regerror.o: regerror.ih +debug.o: debug.ih +re_main.o: re_main.ih + +# tester +re: $(OBJS) + $(CC) $(CFLAGS) $(LDFLAGS) $(OBJS) $(LIBS) -o $@ + +# regression test +r: re tests + ./re &1 | egrep -v '$(JUNKLINT)' | tee lint + +fullprint: + ti README WHATSNEW notes todo | list + ti *.h | list + list *.c + list regex.3 regex.7 + +print: + ti README WHATSNEW notes todo | list + ti *.h | list + list reg*.c engine.c + + +mf.tmp: Makefile + sed '/^REGEXH=/s/=.*/=regex.h/' Makefile | sed '/#DEL$$/d' >$@ + +DTRH=cclass.h cname.h regex2.h utils.h +PRE=COPYRIGHT README WHATSNEW +POST=mkh regex.3 regex.7 tests $(DTRH) $(ALLSRC) fake/*.[ch] +FILES=$(PRE) Makefile $(POST) +DTR=$(PRE) Makefile=mf.tmp $(POST) +dtr: $(FILES) mf.tmp + makedtr $(DTR) >$@ + rm mf.tmp + +cio: $(FILES) + cio $(FILES) + +rdf: $(FILES) + rcsdiff -c $(FILES) 2>&1 | p + +# various forms of cleanup +tidy: + rm -f junk* core core.* *.core dtr *.tmp lint + +clean: tidy + rm -f *.o *.s *.ih re libregex.a + +# don't do this one unless you know what you're doing +spotless: clean + rm -f mkh regex.h diff --git a/src/regex/cclass.h b/src/regex/cclass.h new file mode 100644 index 0000000000..2b50a76197 --- /dev/null +++ b/src/regex/cclass.h @@ -0,0 +1,20 @@ +/* character-class table */ +static struct cclass { + char *name; + char *chars; + char *multis; +} cclasses[] = { + { "alnum", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789", "" }, + { "alpha", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz", "" }, + { "blank", " \t", "" }, + { "cntrl", "\007\b\t\n\v\f\r\1\2\3\4\5\6\16\17\20\21\22\23\24\25\26\27\30\31\32\33\34\35\36\37\177", "" }, + { "digit", "0123456789", "" }, + { "graph", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", "" }, + { "lower", "abcdefghijklmnopqrstuvwxyz", "" }, + { "print", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~ ", "" }, + { "punct", "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", "" }, + { "space", "\t\n\v\f\r ", "" }, + { "upper", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "" }, + { "xdigit", "0123456789ABCDEFabcdef", "" }, + { NULL, 0, "" } +}; diff --git a/src/regex/cname.h b/src/regex/cname.h new file mode 100644 index 0000000000..c1a6dd5656 --- /dev/null +++ b/src/regex/cname.h @@ -0,0 +1,102 @@ +/* character-name table */ +static struct cname { + char *name; + char code; +} cnames[] = { + { "NUL", '\0' }, + { "SOH", '\001' }, + { "STX", '\002' }, + { "ETX", '\003' }, + { "EOT", '\004' }, + { "ENQ", '\005' }, + { "ACK", '\006' }, + { "BEL", '\007' }, + { "alert", '\007' }, + { "BS", '\010' }, + { "backspace", '\b' }, + { "HT", '\011' }, + { "tab", '\t' }, + { "LF", '\012' }, + { "newline", '\n' }, + { "VT", '\013' }, + { "vertical-tab", '\v' }, + { "FF", '\014' }, + { "form-feed", '\f' }, + { "CR", '\015' }, + { "carriage-return", '\r' }, + { "SO", '\016' }, + { "SI", '\017' }, + { "DLE", '\020' }, + { "DC1", '\021' }, + { "DC2", '\022' }, + { "DC3", '\023' }, + { "DC4", '\024' }, + { "NAK", '\025' }, + { "SYN", '\026' }, + { "ETB", '\027' }, + { "CAN", '\030' }, + { "EM", '\031' }, + { "SUB", '\032' }, + { "ESC", '\033' }, + { "IS4", '\034' }, + { "FS", '\034' }, + { "IS3", '\035' }, + { "GS", '\035' }, + { "IS2", '\036' }, + { "RS", '\036' }, + { "IS1", '\037' }, + { "US", '\037' }, + { "space", ' ' }, + { "exclamation-mark", '!' }, + { "quotation-mark", '"' }, + { "number-sign", '#' }, + { "dollar-sign", '$' }, + { "percent-sign", '%' }, + { "ampersand", '&' }, + { "apostrophe", '\'' }, + { "left-parenthesis", '(' }, + { "right-parenthesis", ')' }, + { "asterisk", '*' }, + { "plus-sign", '+' }, + { "comma", ',' }, + { "hyphen", '-' }, + { "hyphen-minus", '-' }, + { "period", '.' }, + { "full-stop", '.' }, + { "slash", '/' }, + { "solidus", '/' }, + { "zero", '0' }, + { "one", '1' }, + { "two", '2' }, + { "three", '3' }, + { "four", '4' }, + { "five", '5' }, + { "six", '6' }, + { "seven", '7' }, + { "eight", '8' }, + { "nine", '9' }, + { "colon", ':' }, + { "semicolon", ';' }, + { "less-than-sign", '<' }, + { "equals-sign", '=' }, + { "greater-than-sign", '>' }, + { "question-mark", '?' }, + { "commercial-at", '@' }, + { "left-square-bracket", '[' }, + { "backslash", '\\' }, + { "reverse-solidus", '\\' }, + { "right-square-bracket", ']' }, + { "circumflex", '^' }, + { "circumflex-accent", '^' }, + { "underscore", '_' }, + { "low-line", '_' }, + { "grave-accent", '`' }, + { "left-brace", '{' }, + { "left-curly-bracket", '{' }, + { "vertical-line", '|' }, + { "right-brace", '}' }, + { "right-curly-bracket", '}' }, + { "tilde", '~' }, + { "DEL", '\177' }, + { NULL, 0 }, +}; diff --git a/src/regex/debug.c b/src/regex/debug.c new file mode 100644 index 0000000000..bf40bbb3bd --- /dev/null +++ b/src/regex/debug.c @@ -0,0 +1,242 @@ +#include +#include +#include +#include +#include +#include +#include "regex.h" + +#include "utils.h" +#include "regex2.h" +#include "debug.ih" + +/* + - regprint - print a regexp for debugging + == void regprint(regex_t *r, FILE *d); + */ +void +regprint(r, d) +regex_t *r; +FILE *d; +{ + register struct re_guts *g = r->re_g; + register int i; + register int c; + register int last; + int nincat[NC]; + + fprintf(d, "%ld states, %d categories", (long)g->nstates, + g->ncategories); + fprintf(d, ", first %ld last %ld", (long)g->firststate, + (long)g->laststate); + if (g->iflags&USEBOL) + fprintf(d, ", USEBOL"); + if (g->iflags&USEEOL) + fprintf(d, ", USEEOL"); + if (g->iflags&BAD) + fprintf(d, ", BAD"); + if (g->nsub > 0) + fprintf(d, ", nsub=%ld", (long)g->nsub); + if (g->must != NULL) + fprintf(d, ", must(%ld) `%*s'", (long)g->mlen, (int)g->mlen, + g->must); + if (g->backrefs) + fprintf(d, ", backrefs"); + if (g->nplus > 0) + fprintf(d, ", nplus %ld", (long)g->nplus); + fprintf(d, "\n"); + s_print(g, d); + for (i = 0; i < g->ncategories; i++) { + nincat[i] = 0; + for (c = CHAR_MIN; c <= CHAR_MAX; c++) + if (g->categories[c] == i) + nincat[i]++; + } + fprintf(d, "cc0#%d", nincat[0]); + for (i = 1; i < g->ncategories; i++) + if (nincat[i] == 1) { + for (c = CHAR_MIN; c <= CHAR_MAX; c++) + if (g->categories[c] == i) + break; + fprintf(d, ", %d=%s", i, regchar(c)); + } + fprintf(d, "\n"); + for (i = 1; i < g->ncategories; i++) + if (nincat[i] != 1) { + fprintf(d, "cc%d\t", i); + last = -1; + for (c = CHAR_MIN; c <= CHAR_MAX+1; c++) /* +1 does flush */ + if (c <= CHAR_MAX && g->categories[c] == i) { + if (last < 0) { + fprintf(d, "%s", regchar(c)); + last = c; + } + } else { + if (last >= 0) { + if (last != c-1) + fprintf(d, "-%s", + regchar(c-1)); + last = -1; + } + } + fprintf(d, "\n"); + } +} + +/* + - s_print - print the strip for debugging + == static void s_print(register struct re_guts *g, FILE *d); + */ +static void +s_print(g, d) +register struct re_guts *g; +FILE *d; +{ + register sop *s; + register cset *cs; + register int i; + register int done = 0; + register sop opnd; + register int col = 0; + register int last; + register sopno offset = 2; +# define GAP() { if (offset % 5 == 0) { \ + if (col > 40) { \ + fprintf(d, "\n\t"); \ + col = 0; \ + } else { \ + fprintf(d, " "); \ + col++; \ + } \ + } else \ + col++; \ + offset++; \ + } + + if (OP(g->strip[0]) != OEND) + fprintf(d, "missing initial OEND!\n"); + for (s = &g->strip[1]; !done; s++) { + opnd = OPND(*s); + switch (OP(*s)) { + case OEND: + fprintf(d, "\n"); + done = 1; + break; + case OCHAR: + if (strchr("\\|()^$.[+*?{}!<> ", (char)opnd) != NULL) + fprintf(d, "\\%c", (char)opnd); + else + fprintf(d, "%s", regchar((char)opnd)); + break; + case OBOL: + fprintf(d, "^"); + break; + case OEOL: + fprintf(d, "$"); + break; + case OBOW: + fprintf(d, "\\{"); + break; + case OEOW: + fprintf(d, "\\}"); + break; + case OANY: + fprintf(d, "."); + break; + case OANYOF: + fprintf(d, "[(%ld)", (long)opnd); + cs = &g->sets[opnd]; + last = -1; + for (i = 0; i < g->csetsize+1; i++) /* +1 flushes */ + if (CHIN(cs, i) && i < g->csetsize) { + if (last < 0) { + fprintf(d, "%s", regchar(i)); + last = i; + } + } else { + if (last >= 0) { + if (last != i-1) + fprintf(d, "-%s", + regchar(i-1)); + last = -1; + } + } + fprintf(d, "]"); + break; + case OBACK_: + fprintf(d, "(\\<%ld>", (long)opnd); + break; + case O_BACK: + fprintf(d, "<%ld>\\)", (long)opnd); + break; + case OPLUS_: + fprintf(d, "(+"); + if (OP(*(s+opnd)) != O_PLUS) + fprintf(d, "<%ld>", (long)opnd); + break; + case O_PLUS: + if (OP(*(s-opnd)) != OPLUS_) + fprintf(d, "<%ld>", (long)opnd); + fprintf(d, "+)"); + break; + case OQUEST_: + fprintf(d, "(?"); + if (OP(*(s+opnd)) != O_QUEST) + fprintf(d, "<%ld>", (long)opnd); + break; + case O_QUEST: + if (OP(*(s-opnd)) != OQUEST_) + fprintf(d, "<%ld>", (long)opnd); + fprintf(d, "?)"); + break; + case OLPAREN: + fprintf(d, "((<%ld>", (long)opnd); + break; + case ORPAREN: + fprintf(d, "<%ld>))", (long)opnd); + break; + case OCH_: + fprintf(d, "<"); + if (OP(*(s+opnd)) != OOR2) + fprintf(d, "<%ld>", (long)opnd); + break; + case OOR1: + if (OP(*(s-opnd)) != OOR1 && OP(*(s-opnd)) != OCH_) + fprintf(d, "<%ld>", (long)opnd); + fprintf(d, "|"); + break; + case OOR2: + fprintf(d, "|"); + if (OP(*(s+opnd)) != OOR2 && OP(*(s+opnd)) != O_CH) + fprintf(d, "<%ld>", (long)opnd); + break; + case O_CH: + if (OP(*(s-opnd)) != OOR1) + fprintf(d, "<%ld>", (long)opnd); + fprintf(d, ">"); + break; + default: + fprintf(d, "!%d(%d)!", OP(*s), opnd); + break; + } + if (!done) + GAP(); + } +} + +/* + - regchar - make a character printable + == static char *regchar(int ch); + */ +static char * /* -> representation */ +regchar(ch) +int ch; +{ + static char buf[10]; + + if (isprint(ch) || ch == ' ') + sprintf(buf, "%c", ch); + else + sprintf(buf, "\\%o", ch); + return(buf); +} diff --git a/src/regex/engine.c b/src/regex/engine.c new file mode 100644 index 0000000000..0b88dcf1ed --- /dev/null +++ b/src/regex/engine.c @@ -0,0 +1,1019 @@ +/* + * The matching engine and friends. This file is #included by regexec.c + * after suitable #defines of a variety of macros used herein, so that + * different state representations can be used without duplicating masses + * of code. + */ + +#ifdef SNAMES +#define matcher smatcher +#define fast sfast +#define slow sslow +#define dissect sdissect +#define backref sbackref +#define step sstep +#define print sprint +#define at sat +#define match smat +#endif +#ifdef LNAMES +#define matcher lmatcher +#define fast lfast +#define slow lslow +#define dissect ldissect +#define backref lbackref +#define step lstep +#define print lprint +#define at lat +#define match lmat +#endif + +/* another structure passed up and down to avoid zillions of parameters */ +struct match { + struct re_guts *g; + int eflags; + regmatch_t *pmatch; /* [nsub+1] (0 element unused) */ + char *offp; /* offsets work from here */ + char *beginp; /* start of string -- virtual NUL precedes */ + char *endp; /* end of string -- virtual NUL here */ + char *coldp; /* can be no match starting before here */ + char **lastpos; /* [nplus+1] */ + STATEVARS; + states st; /* current states */ + states fresh; /* states for a fresh start */ + states tmp; /* temporary */ + states empty; /* empty set of states */ +}; + +#include "engine.ih" + +#ifdef REDEBUG +#define SP(t, s, c) print(m, t, s, c, stdout) +#define AT(t, p1, p2, s1, s2) at(m, t, p1, p2, s1, s2) +#define NOTE(str) { if (m->eflags®_TRACE) printf("=%s\n", (str)); } +#else +#define SP(t, s, c) /* nothing */ +#define AT(t, p1, p2, s1, s2) /* nothing */ +#define NOTE(s) /* nothing */ +#endif + +/* + - matcher - the actual matching engine + == static int matcher(register struct re_guts *g, char *string, \ + == size_t nmatch, regmatch_t pmatch[], int eflags); + */ +static int /* 0 success, REG_NOMATCH failure */ +matcher(g, string, nmatch, pmatch, eflags) +register struct re_guts *g; +char *string; +size_t nmatch; +regmatch_t pmatch[]; +int eflags; +{ + register char *endp; + register size_t i; + struct match mv; + register struct match *m = &mv; + register char *dp; + const register sopno gf = g->firststate+1; /* +1 for OEND */ + const register sopno gl = g->laststate; + char *start; + char *stop; + + /* simplify the situation where possible */ + if (g->cflags®_NOSUB) + nmatch = 0; + if (eflags®_STARTEND) { + start = string + pmatch[0].rm_so; + stop = string + pmatch[0].rm_eo; + } else { + start = string; + stop = start + strlen(start); + } + if (stop < start) + return(REG_INVARG); + + /* prescreening; this does wonders for this rather slow code */ + if (g->must != NULL) { + for (dp = start; dp < stop; dp++) + if (*dp == g->must[0] && stop - dp >= g->mlen && + memcmp(dp, g->must, (size_t)g->mlen) == 0) + break; + if (dp == stop) /* we didn't find g->must */ + return(REG_NOMATCH); + } + + /* match struct setup */ + m->g = g; + m->eflags = eflags; + m->pmatch = NULL; + m->lastpos = NULL; + m->offp = string; + m->beginp = start; + m->endp = stop; + STATESETUP(m, 4); + SETUP(m->st); + SETUP(m->fresh); + SETUP(m->tmp); + SETUP(m->empty); + CLEAR(m->empty); + + /* this loop does only one repetition except for backrefs */ + for (;;) { + endp = fast(m, start, stop, gf, gl); + if (endp == NULL) { /* a miss */ + STATETEARDOWN(m); + return(REG_NOMATCH); + } + if (nmatch == 0 && !g->backrefs) + break; /* no further info needed */ + + /* where? */ + assert(m->coldp != NULL); + for (;;) { + NOTE("finding start"); + endp = slow(m, m->coldp, stop, gf, gl); + if (endp != NULL) + break; + assert(m->coldp < m->endp); + m->coldp++; + } + if (nmatch == 1 && !g->backrefs) + break; /* no further info needed */ + + /* oh my, he wants the subexpressions... */ + if (m->pmatch == NULL) + m->pmatch = (regmatch_t *)malloc((m->g->nsub + 1) * + sizeof(regmatch_t)); + if (m->pmatch == NULL) { + STATETEARDOWN(m); + return(REG_ESPACE); + } + for (i = 1; i <= m->g->nsub; i++) + m->pmatch[i].rm_so = m->pmatch[i].rm_eo = -1; + if (!g->backrefs && !(m->eflags®_BACKR)) { + NOTE("dissecting"); + dp = dissect(m, m->coldp, endp, gf, gl); + } else { + if (g->nplus > 0 && m->lastpos == NULL) + m->lastpos = (char **)malloc((g->nplus+1) * + sizeof(char *)); + if (g->nplus > 0 && m->lastpos == NULL) { + free(m->pmatch); + STATETEARDOWN(m); + return(REG_ESPACE); + } + NOTE("backref dissect"); + dp = backref(m, m->coldp, endp, gf, gl, (sopno)0); + } + if (dp != NULL) + break; + + /* uh-oh... we couldn't find a subexpression-level match */ + assert(g->backrefs); /* must be back references doing it */ + assert(g->nplus == 0 || m->lastpos != NULL); + for (;;) { + if (dp != NULL || endp <= m->coldp) + break; /* defeat */ + NOTE("backoff"); + endp = slow(m, m->coldp, endp-1, gf, gl); + if (endp == NULL) + break; /* defeat */ + /* try it on a shorter possibility */ +#ifndef NDEBUG + for (i = 1; i <= m->g->nsub; i++) { + assert(m->pmatch[i].rm_so == -1); + assert(m->pmatch[i].rm_eo == -1); + } +#endif + NOTE("backoff dissect"); + dp = backref(m, m->coldp, endp, gf, gl, (sopno)0); + } + assert(dp == NULL || dp == endp); + if (dp != NULL) /* found a shorter one */ + break; + + /* despite initial appearances, there is no match here */ + NOTE("false alarm"); + start = m->coldp + 1; /* recycle starting later */ + assert(start <= stop); + } + + /* fill in the details if requested */ + if (nmatch > 0) { + pmatch[0].rm_so = m->coldp - m->offp; + pmatch[0].rm_eo = endp - m->offp; + } + if (nmatch > 1) { + assert(m->pmatch != NULL); + for (i = 1; i < nmatch; i++) + if (i <= m->g->nsub) + pmatch[i] = m->pmatch[i]; + else { + pmatch[i].rm_so = -1; + pmatch[i].rm_eo = -1; + } + } + + if (m->pmatch != NULL) + free((char *)m->pmatch); + if (m->lastpos != NULL) + free((char *)m->lastpos); + STATETEARDOWN(m); + return(0); +} + +/* + - dissect - figure out what matched what, no back references + == static char *dissect(register struct match *m, char *start, \ + == char *stop, sopno startst, sopno stopst); + */ +static char * /* == stop (success) always */ +dissect(m, start, stop, startst, stopst) +register struct match *m; +char *start; +char *stop; +sopno startst; +sopno stopst; +{ + register int i; + register sopno ss; /* start sop of current subRE */ + register sopno es; /* end sop of current subRE */ + register char *sp; /* start of string matched by it */ + register char *stp; /* string matched by it cannot pass here */ + register char *rest; /* start of rest of string */ + register char *tail; /* string unmatched by rest of RE */ + register sopno ssub; /* start sop of subsubRE */ + register sopno esub; /* end sop of subsubRE */ + register char *ssp; /* start of string matched by subsubRE */ + register char *sep; /* end of string matched by subsubRE */ + register char *oldssp; /* previous ssp */ + register char *dp; + + AT("diss", start, stop, startst, stopst); + sp = start; + for (ss = startst; ss < stopst; ss = es) { + /* identify end of subRE */ + es = ss; + switch (OP(m->g->strip[es])) { + case OPLUS_: + case OQUEST_: + es += OPND(m->g->strip[es]); + break; + case OCH_: + while (OP(m->g->strip[es]) != O_CH) + es += OPND(m->g->strip[es]); + break; + } + es++; + + /* figure out what it matched */ + switch (OP(m->g->strip[ss])) { + case OEND: + assert(nope); + break; + case OCHAR: + sp++; + break; + case OBOL: + case OEOL: + case OBOW: + case OEOW: + break; + case OANY: + case OANYOF: + sp++; + break; + case OBACK_: + case O_BACK: + assert(nope); + break; + /* cases where length of match is hard to find */ + case OQUEST_: + stp = stop; + for (;;) { + /* how long could this one be? */ + rest = slow(m, sp, stp, ss, es); + assert(rest != NULL); /* it did match */ + /* could the rest match the rest? */ + tail = slow(m, rest, stop, es, stopst); + if (tail == stop) + break; /* yes! */ + /* no -- try a shorter match for this one */ + stp = rest - 1; + assert(stp >= sp); /* it did work */ + } + ssub = ss + 1; + esub = es - 1; + /* did innards match? */ + if (slow(m, sp, rest, ssub, esub) != NULL) { + dp = dissect(m, sp, rest, ssub, esub); + assert(dp == rest); + } else /* no */ + assert(sp == rest); + sp = rest; + break; + case OPLUS_: + stp = stop; + for (;;) { + /* how long could this one be? */ + rest = slow(m, sp, stp, ss, es); + assert(rest != NULL); /* it did match */ + /* could the rest match the rest? */ + tail = slow(m, rest, stop, es, stopst); + if (tail == stop) + break; /* yes! */ + /* no -- try a shorter match for this one */ + stp = rest - 1; + assert(stp >= sp); /* it did work */ + } + ssub = ss + 1; + esub = es - 1; + ssp = sp; + oldssp = ssp; + for (;;) { /* find last match of innards */ + sep = slow(m, ssp, rest, ssub, esub); + if (sep == NULL || sep == ssp) + break; /* failed or matched null */ + oldssp = ssp; /* on to next try */ + ssp = sep; + } + if (sep == NULL) { + /* last successful match */ + sep = ssp; + ssp = oldssp; + } + assert(sep == rest); /* must exhaust substring */ + assert(slow(m, ssp, sep, ssub, esub) == rest); + dp = dissect(m, ssp, sep, ssub, esub); + assert(dp == sep); + sp = rest; + break; + case OCH_: + stp = stop; + for (;;) { + /* how long could this one be? */ + rest = slow(m, sp, stp, ss, es); + assert(rest != NULL); /* it did match */ + /* could the rest match the rest? */ + tail = slow(m, rest, stop, es, stopst); + if (tail == stop) + break; /* yes! */ + /* no -- try a shorter match for this one */ + stp = rest - 1; + assert(stp >= sp); /* it did work */ + } + ssub = ss + 1; + esub = ss + OPND(m->g->strip[ss]) - 1; + assert(OP(m->g->strip[esub]) == OOR1); + for (;;) { /* find first matching branch */ + if (slow(m, sp, rest, ssub, esub) == rest) + break; /* it matched all of it */ + /* that one missed, try next one */ + assert(OP(m->g->strip[esub]) == OOR1); + esub++; + assert(OP(m->g->strip[esub]) == OOR2); + ssub = esub + 1; + esub += OPND(m->g->strip[esub]); + if (OP(m->g->strip[esub]) == OOR2) + esub--; + else + assert(OP(m->g->strip[esub]) == O_CH); + } + dp = dissect(m, sp, rest, ssub, esub); + assert(dp == rest); + sp = rest; + break; + case O_PLUS: + case O_QUEST: + case OOR1: + case OOR2: + case O_CH: + assert(nope); + break; + case OLPAREN: + i = OPND(m->g->strip[ss]); + assert(0 < i && i <= m->g->nsub); + m->pmatch[i].rm_so = sp - m->offp; + break; + case ORPAREN: + i = OPND(m->g->strip[ss]); + assert(0 < i && i <= m->g->nsub); + m->pmatch[i].rm_eo = sp - m->offp; + break; + default: /* uh oh */ + assert(nope); + break; + } + } + + assert(sp == stop); + return(sp); +} + +/* + - backref - figure out what matched what, figuring in back references + == static char *backref(register struct match *m, char *start, \ + == char *stop, sopno startst, sopno stopst, sopno lev); + */ +static char * /* == stop (success) or NULL (failure) */ +backref(m, start, stop, startst, stopst, lev) +register struct match *m; +char *start; +char *stop; +sopno startst; +sopno stopst; +sopno lev; /* PLUS nesting level */ +{ + register int i; + register sopno ss; /* start sop of current subRE */ + register char *sp; /* start of string matched by it */ + register sopno ssub; /* start sop of subsubRE */ + register sopno esub; /* end sop of subsubRE */ + register char *ssp; /* start of string matched by subsubRE */ + register char *dp; + register size_t len; + register int hard; + register sop s; + register regoff_t offsave; + register cset *cs; + + AT("back", start, stop, startst, stopst); + sp = start; + + /* get as far as we can with easy stuff */ + hard = 0; + for (ss = startst; !hard && ss < stopst; ss++) + switch (OP(s = m->g->strip[ss])) { + case OCHAR: + if (sp == stop || *sp++ != (char)OPND(s)) + return(NULL); + break; + case OANY: + if (sp == stop) + return(NULL); + sp++; + break; + case OANYOF: + cs = &m->g->sets[OPND(s)]; + if (sp == stop || !CHIN(cs, *sp++)) + return(NULL); + break; + case OBOL: + if ( (sp == m->beginp && !(m->eflags®_NOTBOL)) || + (sp < m->endp && *(sp-1) == '\n' && + (m->g->cflags®_NEWLINE)) ) + { /* yes */ } + else + return(NULL); + break; + case OEOL: + if ( (sp == m->endp && !(m->eflags®_NOTEOL)) || + (sp < m->endp && *sp == '\n' && + (m->g->cflags®_NEWLINE)) ) + { /* yes */ } + else + return(NULL); + break; + case OBOW: + if (( (sp == m->beginp && !(m->eflags®_NOTBOL)) || + (sp < m->endp && *(sp-1) == '\n' && + (m->g->cflags®_NEWLINE)) || + (sp > m->beginp && + !ISWORD(*(sp-1))) ) && + (sp < m->endp && ISWORD(*sp)) ) + { /* yes */ } + else + return(NULL); + break; + case OEOW: + if (( (sp == m->endp && !(m->eflags®_NOTEOL)) || + (sp < m->endp && *sp == '\n' && + (m->g->cflags®_NEWLINE)) || + (sp < m->endp && !ISWORD(*sp)) ) && + (sp > m->beginp && ISWORD(*(sp-1))) ) + { /* yes */ } + else + return(NULL); + break; + case O_QUEST: + break; + case OOR1: /* matches null but needs to skip */ + ss++; + s = m->g->strip[ss]; + do { + assert(OP(s) == OOR2); + ss += OPND(s); + } while (OP(s = m->g->strip[ss]) != O_CH); + /* note that the ss++ gets us past the O_CH */ + break; + default: /* have to make a choice */ + hard = 1; + break; + } + if (!hard) { /* that was it! */ + if (sp != stop) + return(NULL); + return(sp); + } + ss--; /* adjust for the for's final increment */ + + /* the hard stuff */ + AT("hard", sp, stop, ss, stopst); + s = m->g->strip[ss]; + switch (OP(s)) { + case OBACK_: /* the vilest depths */ + i = OPND(s); + assert(0 < i && i <= m->g->nsub); + if (m->pmatch[i].rm_eo == -1) + return(NULL); + assert(m->pmatch[i].rm_so != -1); + len = m->pmatch[i].rm_eo - m->pmatch[i].rm_so; + assert(stop - m->beginp >= len); + if (sp > stop - len) + return(NULL); /* not enough left to match */ + ssp = m->offp + m->pmatch[i].rm_so; + if (memcmp(sp, ssp, len) != 0) + return(NULL); + while (m->g->strip[ss] != SOP(O_BACK, i)) + ss++; + return(backref(m, sp+len, stop, ss+1, stopst, lev)); + break; + case OQUEST_: /* to null or not */ + dp = backref(m, sp, stop, ss+1, stopst, lev); + if (dp != NULL) + return(dp); /* not */ + return(backref(m, sp, stop, ss+OPND(s)+1, stopst, lev)); + break; + case OPLUS_: + assert(m->lastpos != NULL); + assert(lev+1 <= m->g->nplus); + m->lastpos[lev+1] = sp; + return(backref(m, sp, stop, ss+1, stopst, lev+1)); + break; + case O_PLUS: + if (sp == m->lastpos[lev]) /* last pass matched null */ + return(backref(m, sp, stop, ss+1, stopst, lev-1)); + /* try another pass */ + m->lastpos[lev] = sp; + dp = backref(m, sp, stop, ss-OPND(s)+1, stopst, lev); + if (dp == NULL) + return(backref(m, sp, stop, ss+1, stopst, lev-1)); + else + return(dp); + break; + case OCH_: /* find the right one, if any */ + ssub = ss + 1; + esub = ss + OPND(s) - 1; + assert(OP(m->g->strip[esub]) == OOR1); + for (;;) { /* find first matching branch */ + dp = backref(m, sp, stop, ssub, esub, lev); + if (dp != NULL) + return(dp); + /* that one missed, try next one */ + if (OP(m->g->strip[esub]) == O_CH) + return(NULL); /* there is none */ + esub++; + assert(OP(m->g->strip[esub]) == OOR2); + ssub = esub + 1; + esub += OPND(m->g->strip[esub]); + if (OP(m->g->strip[esub]) == OOR2) + esub--; + else + assert(OP(m->g->strip[esub]) == O_CH); + } + break; + case OLPAREN: /* must undo assignment if rest fails */ + i = OPND(s); + assert(0 < i && i <= m->g->nsub); + offsave = m->pmatch[i].rm_so; + m->pmatch[i].rm_so = sp - m->offp; + dp = backref(m, sp, stop, ss+1, stopst, lev); + if (dp != NULL) + return(dp); + m->pmatch[i].rm_so = offsave; + return(NULL); + break; + case ORPAREN: /* must undo assignment if rest fails */ + i = OPND(s); + assert(0 < i && i <= m->g->nsub); + offsave = m->pmatch[i].rm_eo; + m->pmatch[i].rm_eo = sp - m->offp; + dp = backref(m, sp, stop, ss+1, stopst, lev); + if (dp != NULL) + return(dp); + m->pmatch[i].rm_eo = offsave; + return(NULL); + break; + default: /* uh oh */ + assert(nope); + break; + } + + /* "can't happen" */ + assert(nope); + /* NOTREACHED */ + return((char *)NULL); /* dummy */ +} + +/* + - fast - step through the string at top speed + == static char *fast(register struct match *m, char *start, \ + == char *stop, sopno startst, sopno stopst); + */ +static char * /* where tentative match ended, or NULL */ +fast(m, start, stop, startst, stopst) +register struct match *m; +char *start; +char *stop; +sopno startst; +sopno stopst; +{ + register states st = m->st; + register states fresh = m->fresh; + register states tmp = m->tmp; + register char *p = start; + register int c = (start == m->beginp) ? OUT : *(start-1); + register int lastc; /* previous c */ + register int flagch; + register int i; + register char *coldp; /* last p after which no match was underway */ + + CLEAR(st); + SET1(st, startst); + st = step(m->g, startst, stopst, st, NOTHING, st); + ASSIGN(fresh, st); + SP("start", st, *p); + coldp = NULL; + for (;;) { + /* next character */ + lastc = c; + c = (p == m->endp) ? OUT : *p; + if (EQ(st, fresh)) + coldp = p; + + /* is there an EOL and/or BOL between lastc and c? */ + flagch = '\0'; + i = 0; + if ( (lastc == '\n' && m->g->cflags®_NEWLINE) || + (lastc == OUT && !(m->eflags®_NOTBOL)) ) { + flagch = BOL; + i = m->g->nbol; + } + if ( (c == '\n' && m->g->cflags®_NEWLINE) || + (c == OUT && !(m->eflags®_NOTEOL)) ) { + flagch = (flagch == BOL) ? BOLEOL : EOL; + i += m->g->neol; + } + if (i != 0) { + for (; i > 0; i--) + st = step(m->g, startst, stopst, st, flagch, st); + SP("boleol", st, c); + } + + /* how about a word boundary? */ + if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) && + (c != OUT && ISWORD(c)) ) { + flagch = BOW; + } + if ( (lastc != OUT && ISWORD(lastc)) && + (flagch == EOL || (c != OUT && !ISWORD(c))) ) { + flagch = EOW; + } + if (flagch == BOW || flagch == EOW) { + st = step(m->g, startst, stopst, st, flagch, st); + SP("boweow", st, c); + } + + /* are we done? */ + if (ISSET(st, stopst) || p == stop) + break; /* NOTE BREAK OUT */ + + /* no, we must deal with this character */ + ASSIGN(tmp, st); + ASSIGN(st, fresh); + assert(c != OUT); + st = step(m->g, startst, stopst, tmp, c, st); + SP("aft", st, c); + assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st)); + p++; + } + + assert(coldp != NULL); + m->coldp = coldp; + if (ISSET(st, stopst)) + return(p+1); + else + return(NULL); +} + +/* + - slow - step through the string more deliberately + == static char *slow(register struct match *m, char *start, \ + == char *stop, sopno startst, sopno stopst); + */ +static char * /* where it ended */ +slow(m, start, stop, startst, stopst) +register struct match *m; +char *start; +char *stop; +sopno startst; +sopno stopst; +{ + register states st = m->st; + register states empty = m->empty; + register states tmp = m->tmp; + register char *p = start; + register int c = (start == m->beginp) ? OUT : *(start-1); + register int lastc; /* previous c */ + register int flagch; + register int i; + register char *matchp; /* last p at which a match ended */ + + AT("slow", start, stop, startst, stopst); + CLEAR(st); + SET1(st, startst); + SP("sstart", st, *p); + st = step(m->g, startst, stopst, st, NOTHING, st); + matchp = NULL; + for (;;) { + /* next character */ + lastc = c; + c = (p == m->endp) ? OUT : *p; + + /* is there an EOL and/or BOL between lastc and c? */ + flagch = '\0'; + i = 0; + if ( (lastc == '\n' && m->g->cflags®_NEWLINE) || + (lastc == OUT && !(m->eflags®_NOTBOL)) ) { + flagch = BOL; + i = m->g->nbol; + } + if ( (c == '\n' && m->g->cflags®_NEWLINE) || + (c == OUT && !(m->eflags®_NOTEOL)) ) { + flagch = (flagch == BOL) ? BOLEOL : EOL; + i += m->g->neol; + } + if (i != 0) { + for (; i > 0; i--) + st = step(m->g, startst, stopst, st, flagch, st); + SP("sboleol", st, c); + } + + /* how about a word boundary? */ + if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) && + (c != OUT && ISWORD(c)) ) { + flagch = BOW; + } + if ( (lastc != OUT && ISWORD(lastc)) && + (flagch == EOL || (c != OUT && !ISWORD(c))) ) { + flagch = EOW; + } + if (flagch == BOW || flagch == EOW) { + st = step(m->g, startst, stopst, st, flagch, st); + SP("sboweow", st, c); + } + + /* are we done? */ + if (ISSET(st, stopst)) + matchp = p; + if (EQ(st, empty) || p == stop) + break; /* NOTE BREAK OUT */ + + /* no, we must deal with this character */ + ASSIGN(tmp, st); + ASSIGN(st, empty); + assert(c != OUT); + st = step(m->g, startst, stopst, tmp, c, st); + SP("saft", st, c); + assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st)); + p++; + } + + return(matchp); +} + + +/* + - step - map set of states reachable before char to set reachable after + == static states step(register struct re_guts *g, sopno start, sopno stop, \ + == register states bef, int ch, register states aft); + == #define BOL (OUT+1) + == #define EOL (BOL+1) + == #define BOLEOL (BOL+2) + == #define NOTHING (BOL+3) + == #define BOW (BOL+4) + == #define EOW (BOL+5) + == #define CODEMAX (BOL+5) // highest code used + == #define NONCHAR(c) ((c) > CHAR_MAX) + == #define NNONCHAR (CODEMAX-CHAR_MAX) + */ +static states +step(g, start, stop, bef, ch, aft) +register struct re_guts *g; +sopno start; /* start state within strip */ +sopno stop; /* state after stop state within strip */ +register states bef; /* states reachable before */ +int ch; /* character or NONCHAR code */ +register states aft; /* states already known reachable after */ +{ + register cset *cs; + register sop s; + register sopno pc; + register onestate here; /* note, macros know this name */ + register sopno look; + register long i; + + for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) { + s = g->strip[pc]; + switch (OP(s)) { + case OEND: + assert(pc == stop-1); + break; + case OCHAR: + /* only characters can match */ + assert(!NONCHAR(ch) || ch != (char)OPND(s)); + if (ch == (char)OPND(s)) + FWD(aft, bef, 1); + break; + case OBOL: + if (ch == BOL || ch == BOLEOL) + FWD(aft, bef, 1); + break; + case OEOL: + if (ch == EOL || ch == BOLEOL) + FWD(aft, bef, 1); + break; + case OBOW: + if (ch == BOW) + FWD(aft, bef, 1); + break; + case OEOW: + if (ch == EOW) + FWD(aft, bef, 1); + break; + case OANY: + if (!NONCHAR(ch)) + FWD(aft, bef, 1); + break; + case OANYOF: + cs = &g->sets[OPND(s)]; + if (!NONCHAR(ch) && CHIN(cs, ch)) + FWD(aft, bef, 1); + break; + case OBACK_: /* ignored here */ + case O_BACK: + FWD(aft, aft, 1); + break; + case OPLUS_: /* forward, this is just an empty */ + FWD(aft, aft, 1); + break; + case O_PLUS: /* both forward and back */ + FWD(aft, aft, 1); + i = ISSETBACK(aft, OPND(s)); + BACK(aft, aft, OPND(s)); + if (!i && ISSETBACK(aft, OPND(s))) { + /* oho, must reconsider loop body */ + pc -= OPND(s) + 1; + INIT(here, pc); + } + break; + case OQUEST_: /* two branches, both forward */ + FWD(aft, aft, 1); + FWD(aft, aft, OPND(s)); + break; + case O_QUEST: /* just an empty */ + FWD(aft, aft, 1); + break; + case OLPAREN: /* not significant here */ + case ORPAREN: + FWD(aft, aft, 1); + break; + case OCH_: /* mark the first two branches */ + FWD(aft, aft, 1); + assert(OP(g->strip[pc+OPND(s)]) == OOR2); + FWD(aft, aft, OPND(s)); + break; + case OOR1: /* done a branch, find the O_CH */ + if (ISSTATEIN(aft, here)) { + for (look = 1; + OP(s = g->strip[pc+look]) != O_CH; + look += OPND(s)) + assert(OP(s) == OOR2); + FWD(aft, aft, look); + } + break; + case OOR2: /* propagate OCH_'s marking */ + FWD(aft, aft, 1); + if (OP(g->strip[pc+OPND(s)]) != O_CH) { + assert(OP(g->strip[pc+OPND(s)]) == OOR2); + FWD(aft, aft, OPND(s)); + } + break; + case O_CH: /* just empty */ + FWD(aft, aft, 1); + break; + default: /* ooooops... */ + assert(nope); + break; + } + } + + return(aft); +} + +#ifdef REDEBUG +/* + - print - print a set of states + == #ifdef REDEBUG + == static void print(struct match *m, char *caption, states st, \ + == int ch, FILE *d); + == #endif + */ +static void +print(m, caption, st, ch, d) +struct match *m; +char *caption; +states st; +int ch; +FILE *d; +{ + register struct re_guts *g = m->g; + register int i; + register int first = 1; + + if (!(m->eflags®_TRACE)) + return; + + fprintf(d, "%s", caption); + if (ch != '\0') + fprintf(d, " %s", pchar(ch)); + for (i = 0; i < g->nstates; i++) + if (ISSET(st, i)) { + fprintf(d, "%s%d", (first) ? "\t" : ", ", i); + first = 0; + } + fprintf(d, "\n"); +} + +/* + - at - print current situation + == #ifdef REDEBUG + == static void at(struct match *m, char *title, char *start, char *stop, \ + == sopno startst, sopno stopst); + == #endif + */ +static void +at(m, title, start, stop, startst, stopst) +struct match *m; +char *title; +char *start; +char *stop; +sopno startst; +sopno stopst; +{ + if (!(m->eflags®_TRACE)) + return; + + printf("%s %s-", title, pchar(*start)); + printf("%s ", pchar(*stop)); + printf("%ld-%ld\n", (long)startst, (long)stopst); +} + +#ifndef PCHARDONE +#define PCHARDONE /* never again */ +/* + - pchar - make a character printable + == #ifdef REDEBUG + == static char *pchar(int ch); + == #endif + * + * Is this identical to regchar() over in debug.c? Well, yes. But a + * duplicate here avoids having a debugging-capable regexec.o tied to + * a matching debug.o, and this is convenient. It all disappears in + * the non-debug compilation anyway, so it doesn't matter much. + */ +static char * /* -> representation */ +pchar(ch) +int ch; +{ + static char pbuf[10]; + + if (isprint(ch) || ch == ' ') + sprintf(pbuf, "%c", ch); + else + sprintf(pbuf, "\\%o", ch); + return(pbuf); +} +#endif +#endif + +#undef matcher +#undef fast +#undef slow +#undef dissect +#undef backref +#undef step +#undef print +#undef at +#undef match diff --git a/src/regex/re_main.c b/src/regex/re_main.c new file mode 100644 index 0000000000..0221e7713d --- /dev/null +++ b/src/regex/re_main.c @@ -0,0 +1,510 @@ +#include +#include +#include +#include +#include + +#include "main.ih" + +char *progname; +int debug = 0; +int line = 0; +int status = 0; + +int copts = REG_EXTENDED; +int eopts = 0; +regoff_t startoff = 0; +regoff_t endoff = 0; + + +extern int split(); +extern void regprint(); + +/* + - main - do the simple case, hand off to regress() for regression + */ +main(argc, argv) +int argc; +char *argv[]; +{ + regex_t re; +# define NS 10 + regmatch_t subs[NS]; + char erbuf[100]; + int err; + size_t len; + int c; + int errflg = 0; + register int i; + extern int optind; + extern char *optarg; + + progname = argv[0]; + + while ((c = getopt(argc, argv, "c:e:S:E:x")) != EOF) + switch (c) { + case 'c': /* compile options */ + copts = options('c', optarg); + break; + case 'e': /* execute options */ + eopts = options('e', optarg); + break; + case 'S': /* start offset */ + startoff = (regoff_t)atoi(optarg); + break; + case 'E': /* end offset */ + endoff = (regoff_t)atoi(optarg); + break; + case 'x': /* Debugging. */ + debug++; + break; + case '?': + default: + errflg++; + break; + } + if (errflg) { + fprintf(stderr, "usage: %s ", progname); + fprintf(stderr, "[-c copt][-C][-d] [re]\n"); + exit(2); + } + + if (optind >= argc) { + regress(stdin); + exit(status); + } + + err = regcomp(&re, argv[optind++], copts); + if (err) { + len = regerror(err, &re, erbuf, sizeof(erbuf)); + fprintf(stderr, "error %s, %d/%d `%s'\n", + eprint(err), len, sizeof(erbuf), erbuf); + exit(status); + } + regprint(&re, stdout); + + if (optind >= argc) { + regfree(&re); + exit(status); + } + + if (eopts®_STARTEND) { + subs[0].rm_so = startoff; + subs[0].rm_eo = strlen(argv[optind]) - endoff; + } + err = regexec(&re, argv[optind], (size_t)NS, subs, eopts); + if (err) { + len = regerror(err, &re, erbuf, sizeof(erbuf)); + fprintf(stderr, "error %s, %d/%d `%s'\n", + eprint(err), len, sizeof(erbuf), erbuf); + exit(status); + } + if (!(copts®_NOSUB)) { + len = (int)(subs[0].rm_eo - subs[0].rm_so); + if (subs[0].rm_so != -1) { + if (len != 0) + printf("match `%.*s'\n", len, + argv[optind] + subs[0].rm_so); + else + printf("match `'@%.1s\n", + argv[optind] + subs[0].rm_so); + } + for (i = 1; i < NS; i++) + if (subs[i].rm_so != -1) + printf("(%d) `%.*s'\n", i, + (int)(subs[i].rm_eo - subs[i].rm_so), + argv[optind] + subs[i].rm_so); + } + exit(status); +} + +/* + - regress - main loop of regression test + == void regress(FILE *in); + */ +void +regress(in) +FILE *in; +{ + char inbuf[1000]; +# define MAXF 10 + char *f[MAXF]; + int nf; + int i; + char erbuf[100]; + size_t ne; + char *badpat = "invalid regular expression"; +# define SHORT 10 + char *bpname = "REG_BADPAT"; + regex_t re; + + while (fgets(inbuf, sizeof(inbuf), in) != NULL) { + line++; + if (inbuf[0] == '#' || inbuf[0] == '\n') + continue; /* NOTE CONTINUE */ + inbuf[strlen(inbuf)-1] = '\0'; /* get rid of stupid \n */ + if (debug) + fprintf(stdout, "%d:\n", line); + nf = split(inbuf, f, MAXF, "\t\t"); + if (nf < 3) { + fprintf(stderr, "bad input, line %d\n", line); + exit(1); + } + for (i = 0; i < nf; i++) + if (strcmp(f[i], "\"\"") == 0) + f[i] = ""; + if (nf <= 3) + f[3] = NULL; + if (nf <= 4) + f[4] = NULL; + try(f[0], f[1], f[2], f[3], f[4], options('c', f[1])); + if (opt('&', f[1])) /* try with either type of RE */ + try(f[0], f[1], f[2], f[3], f[4], + options('c', f[1]) &~ REG_EXTENDED); + } + + ne = regerror(REG_BADPAT, (regex_t *)NULL, erbuf, sizeof(erbuf)); + if (strcmp(erbuf, badpat) != 0 || ne != strlen(badpat)+1) { + fprintf(stderr, "end: regerror() test gave `%s' not `%s'\n", + erbuf, badpat); + status = 1; + } + ne = regerror(REG_BADPAT, (regex_t *)NULL, erbuf, (size_t)SHORT); + if (strncmp(erbuf, badpat, SHORT-1) != 0 || erbuf[SHORT-1] != '\0' || + ne != strlen(badpat)+1) { + fprintf(stderr, "end: regerror() short test gave `%s' not `%.*s'\n", + erbuf, SHORT-1, badpat); + status = 1; + } + ne = regerror(REG_ITOA|REG_BADPAT, (regex_t *)NULL, erbuf, sizeof(erbuf)); + if (strcmp(erbuf, bpname) != 0 || ne != strlen(bpname)+1) { + fprintf(stderr, "end: regerror() ITOA test gave `%s' not `%s'\n", + erbuf, bpname); + status = 1; + } + re.re_endp = bpname; + ne = regerror(REG_ATOI, &re, erbuf, sizeof(erbuf)); + if (atoi(erbuf) != (int)REG_BADPAT) { + fprintf(stderr, "end: regerror() ATOI test gave `%s' not `%ld'\n", + erbuf, (long)REG_BADPAT); + status = 1; + } else if (ne != strlen(erbuf)+1) { + fprintf(stderr, "end: regerror() ATOI test len(`%s') = %ld\n", + erbuf, (long)REG_BADPAT); + status = 1; + } +} + +/* + - try - try it, and report on problems + == void try(char *f0, char *f1, char *f2, char *f3, char *f4, int opts); + */ +void +try(f0, f1, f2, f3, f4, opts) +char *f0; +char *f1; +char *f2; +char *f3; +char *f4; +int opts; /* may not match f1 */ +{ + regex_t re; +# define NSUBS 10 + regmatch_t subs[NSUBS]; +# define NSHOULD 15 + char *should[NSHOULD]; + int nshould; + char erbuf[100]; + int err; + int len; + char *type = (opts & REG_EXTENDED) ? "ERE" : "BRE"; + register int i; + char *grump; + char f0copy[1000]; + char f2copy[1000]; + + strcpy(f0copy, f0); + re.re_endp = (opts®_PEND) ? f0copy + strlen(f0copy) : NULL; + fixstr(f0copy); + err = regcomp(&re, f0copy, opts); + if (err != 0 && (!opt('C', f1) || err != efind(f2))) { + /* unexpected error or wrong error */ + len = regerror(err, &re, erbuf, sizeof(erbuf)); + fprintf(stderr, "%d: %s error %s, %d/%d `%s'\n", + line, type, eprint(err), len, + sizeof(erbuf), erbuf); + status = 1; + } else if (err == 0 && opt('C', f1)) { + /* unexpected success */ + fprintf(stderr, "%d: %s should have given REG_%s\n", + line, type, f2); + status = 1; + err = 1; /* so we won't try regexec */ + } + + if (err != 0) { + regfree(&re); + return; + } + + strcpy(f2copy, f2); + fixstr(f2copy); + + if (options('e', f1)®_STARTEND) { + if (strchr(f2, '(') == NULL || strchr(f2, ')') == NULL) + fprintf(stderr, "%d: bad STARTEND syntax\n", line); + subs[0].rm_so = strchr(f2, '(') - f2 + 1; + subs[0].rm_eo = strchr(f2, ')') - f2; + } + err = regexec(&re, f2copy, NSUBS, subs, options('e', f1)); + + if (err != 0 && (f3 != NULL || err != REG_NOMATCH)) { + /* unexpected error or wrong error */ + len = regerror(err, &re, erbuf, sizeof(erbuf)); + fprintf(stderr, "%d: %s exec error %s, %d/%d `%s'\n", + line, type, eprint(err), len, + sizeof(erbuf), erbuf); + status = 1; + } else if (err != 0) { + /* nothing more to check */ + } else if (f3 == NULL) { + /* unexpected success */ + fprintf(stderr, "%d: %s exec should have failed\n", + line, type); + status = 1; + err = 1; /* just on principle */ + } else if (opts®_NOSUB) { + /* nothing more to check */ + } else if ((grump = check(f2, subs[0], f3)) != NULL) { + fprintf(stderr, "%d: %s %s\n", line, type, grump); + status = 1; + err = 1; + } + + if (err != 0 || f4 == NULL) { + regfree(&re); + return; + } + + for (i = 1; i < NSHOULD; i++) + should[i] = NULL; + nshould = split(f4, should+1, NSHOULD-1, ","); + if (nshould == 0) { + nshould = 1; + should[1] = ""; + } + for (i = 1; i < NSUBS; i++) { + grump = check(f2, subs[i], should[i]); + if (grump != NULL) { + fprintf(stderr, "%d: %s $%d %s\n", line, + type, i, grump); + status = 1; + err = 1; + } + } + + regfree(&re); +} + +/* + - options - pick options out of a regression-test string + == int options(int type, char *s); + */ +int +options(type, s) +int type; /* 'c' compile, 'e' exec */ +char *s; +{ + register char *p; + register int o = (type == 'c') ? copts : eopts; + register char *legal = (type == 'c') ? "bisnmp" : "^$#tl"; + + for (p = s; *p != '\0'; p++) + if (strchr(legal, *p) != NULL) + switch (*p) { + case 'b': + o &= ~REG_EXTENDED; + break; + case 'i': + o |= REG_ICASE; + break; + case 's': + o |= REG_NOSUB; + break; + case 'n': + o |= REG_NEWLINE; + break; + case 'm': + o &= ~REG_EXTENDED; + o |= REG_NOSPEC; + break; + case 'p': + o |= REG_PEND; + break; + case '^': + o |= REG_NOTBOL; + break; + case '$': + o |= REG_NOTEOL; + break; + case '#': + o |= REG_STARTEND; + break; + case 't': /* trace */ + o |= REG_TRACE; + break; + case 'l': /* force long representation */ + o |= REG_LARGE; + break; + case 'r': /* force backref use */ + o |= REG_BACKR; + break; + } + return(o); +} + +/* + - opt - is a particular option in a regression string? + == int opt(int c, char *s); + */ +int /* predicate */ +opt(c, s) +int c; +char *s; +{ + return(strchr(s, c) != NULL); +} + +/* + - fixstr - transform magic characters in strings + == void fixstr(register char *p); + */ +void +fixstr(p) +register char *p; +{ + if (p == NULL) + return; + + for (; *p != '\0'; p++) + if (*p == 'N') + *p = '\n'; + else if (*p == 'T') + *p = '\t'; + else if (*p == 'S') + *p = ' '; + else if (*p == 'Z') + *p = '\0'; +} + +/* + - check - check a substring match + == char *check(char *str, regmatch_t sub, char *should); + */ +char * /* NULL or complaint */ +check(str, sub, should) +char *str; +regmatch_t sub; +char *should; +{ + register int len; + register int shlen; + register char *p; + static char grump[500]; + register char *at = NULL; + + if (should != NULL && strcmp(should, "-") == 0) + should = NULL; + if (should != NULL && should[0] == '@') { + at = should + 1; + should = ""; + } + + /* check rm_so and rm_eo for consistency */ + if (sub.rm_so > sub.rm_eo || (sub.rm_so == -1 && sub.rm_eo != -1) || + (sub.rm_so != -1 && sub.rm_eo == -1) || + (sub.rm_so != -1 && sub.rm_so < 0) || + (sub.rm_eo != -1 && sub.rm_eo < 0) ) { + sprintf(grump, "start %ld end %ld", (long)sub.rm_so, + (long)sub.rm_eo); + return(grump); + } + + /* check for no match */ + if (sub.rm_so == -1 && should == NULL) + return(NULL); + if (sub.rm_so == -1) + return("did not match"); + + /* check for in range */ + if (sub.rm_eo > strlen(str)) { + sprintf(grump, "start %ld end %ld, past end of string", + (long)sub.rm_so, (long)sub.rm_eo); + return(grump); + } + + len = (int)(sub.rm_eo - sub.rm_so); + shlen = (int)strlen(should); + p = str + sub.rm_so; + + /* check for not supposed to match */ + if (should == NULL) { + sprintf(grump, "matched `%.*s'", len, p); + return(grump); + } + + /* check for wrong match */ + if (len != shlen || strncmp(p, should, (size_t)shlen) != 0) { + sprintf(grump, "matched `%.*s' instead", len, p); + return(grump); + } + if (shlen > 0) + return(NULL); + + /* check null match in right place */ + if (at == NULL) + return(NULL); + shlen = strlen(at); + if (shlen == 0) + shlen = 1; /* force check for end-of-string */ + if (strncmp(p, at, shlen) != 0) { + sprintf(grump, "matched null at `%.20s'", p); + return(grump); + } + return(NULL); +} + +/* + - eprint - convert error number to name + == static char *eprint(int err); + */ +static char * +eprint(err) +int err; +{ + static char epbuf[100]; + size_t len; + + len = regerror(REG_ITOA|err, (regex_t *)NULL, epbuf, sizeof(epbuf)); + assert(len <= sizeof(epbuf)); + return(epbuf); +} + +/* + - efind - convert error name to number + == static int efind(char *name); + */ +static int +efind(name) +char *name; +{ + static char efbuf[100]; + size_t n; + regex_t re; + + sprintf(efbuf, "REG_%s", name); + assert(strlen(efbuf) < sizeof(efbuf)); + re.re_endp = efbuf; + (void) regerror(REG_ATOI, &re, efbuf, sizeof(efbuf)); + return(atoi(efbuf)); +} diff --git a/src/regex/re_syntax.n b/src/regex/re_syntax.n deleted file mode 100644 index f37bb85abd..0000000000 --- a/src/regex/re_syntax.n +++ /dev/null @@ -1,970 +0,0 @@ -'\" -'\" Copyright (c) 1998 Sun Microsystems, Inc. -'\" Copyright (c) 1999 Scriptics Corporation -'\" -'\" This software is copyrighted by the Regents of the University of -'\" California, Sun Microsystems, Inc., Scriptics Corporation, ActiveState -'\" Corporation and other parties. The following terms apply to all files -'\" associated with the software unless explicitly disclaimed in -'\" individual files. -'\" -'\" The authors hereby grant permission to use, copy, modify, distribute, -'\" and license this software and its documentation for any purpose, provided -'\" that existing copyright notices are retained in all copies and that this -'\" notice is included verbatim in any distributions. No written agreement, -'\" license, or royalty fee is required for any of the authorized uses. -'\" Modifications to this software may be copyrighted by their authors -'\" and need not follow the licensing terms described here, provided that -'\" the new terms are clearly indicated on the first page of each file where -'\" they apply. -'\" -'\" IN NO EVENT SHALL THE AUTHORS OR DISTRIBUTORS BE LIABLE TO ANY PARTY -'\" FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES -'\" ARISING OUT OF THE USE OF THIS SOFTWARE, ITS DOCUMENTATION, OR ANY -'\" DERIVATIVES THEREOF, EVEN IF THE AUTHORS HAVE BEEN ADVISED OF THE -'\" POSSIBILITY OF SUCH DAMAGE. -'\" -'\" THE AUTHORS AND DISTRIBUTORS SPECIFICALLY DISCLAIM ANY WARRANTIES, -'\" INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY, -'\" FITNESS FOR A PARTICULAR PURPOSE, AND NON-INFRINGEMENT. THIS SOFTWARE -'\" IS PROVIDED ON AN "AS IS" BASIS, AND THE AUTHORS AND DISTRIBUTORS HAVE -'\" NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR -'\" MODIFICATIONS. -'\" -'\" GOVERNMENT USE: If you are acquiring this software on behalf of the -'\" U.S. government, the Government shall have only "Restricted Rights" -'\" in the software and related documentation as defined in the Federal -'\" Acquisition Regulations (FARs) in Clause 52.227.19 (c) (2). If you -'\" are acquiring the software on behalf of the Department of Defense, the -'\" software shall be classified as "Commercial Computer Software" and the -'\" Government shall have only "Restricted Rights" as defined in Clause -'\" 252.227-7013 (c) (1) of DFARs. Notwithstanding the foregoing, the -'\" authors grant the U.S. Government and others acting in its behalf -'\" permission to use and distribute the software in accordance with the -'\" terms specified in this license. -'\" -'\" RCS: @(#) Id: re_syntax.n,v 1.3 1999/07/14 19:09:36 jpeek Exp -'\" -.so man.macros -.TH re_syntax n "8.1" Tcl "Tcl Built-In Commands" -.BS -.SH NAME -re_syntax \- Syntax of Tcl regular expressions. -.BE - -.SH DESCRIPTION -.PP -A \fIregular expression\fR describes strings of characters. -It's a pattern that matches certain strings and doesn't match others. - -.SH "DIFFERENT FLAVORS OF REs" -Regular expressions (``RE''s), as defined by POSIX, come in two -flavors: \fIextended\fR REs (``EREs'') and \fIbasic\fR REs (``BREs''). -EREs are roughly those of the traditional \fIegrep\fR, while BREs are -roughly those of the traditional \fIed\fR. This implementation adds -a third flavor, \fIadvanced\fR REs (``AREs''), basically EREs with -some significant extensions. -.PP -This manual page primarily describes AREs. BREs mostly exist for -backward compatibility in some old programs; they will be discussed at -the end. POSIX EREs are almost an exact subset of AREs. Features of -AREs that are not present in EREs will be indicated. - -.SH "REGULAR EXPRESSION SYNTAX" -.PP -Tcl regular expressions are implemented using the package written by -Henry Spencer, based on the 1003.2 spec and some (not quite all) of -the Perl5 extensions (thanks, Henry!). Much of the description of -regular expressions below is copied verbatim from his manual entry. -.PP -An ARE is one or more \fIbranches\fR, -separated by `\fB|\fR', -matching anything that matches any of the branches. -.PP -A branch is zero or more \fIconstraints\fR or \fIquantified atoms\fR, -concatenated. -It matches a match for the first, followed by a match for the second, etc; -an empty branch matches the empty string. -.PP -A quantified atom is an \fIatom\fR possibly followed -by a single \fIquantifier\fR. -Without a quantifier, it matches a match for the atom. -The quantifiers, -and what a so-quantified atom matches, are: -.RS 2 -.TP 6 -\fB*\fR -a sequence of 0 or more matches of the atom -.TP -\fB+\fR -a sequence of 1 or more matches of the atom -.TP -\fB?\fR -a sequence of 0 or 1 matches of the atom -.TP -\fB{\fIm\fB}\fR -a sequence of exactly \fIm\fR matches of the atom -.TP -\fB{\fIm\fB,}\fR -a sequence of \fIm\fR or more matches of the atom -.TP -\fB{\fIm\fB,\fIn\fB}\fR -a sequence of \fIm\fR through \fIn\fR (inclusive) matches of the atom; -\fIm\fR may not exceed \fIn\fR -.TP -\fB*? +? ?? {\fIm\fB}? {\fIm\fB,}? {\fIm\fB,\fIn\fB}?\fR -\fInon-greedy\fR quantifiers, -which match the same possibilities, -but prefer the smallest number rather than the largest number -of matches (see MATCHING) -.RE -.PP -The forms using -\fB{\fR and \fB}\fR -are known as \fIbound\fRs. -The numbers -\fIm\fR and \fIn\fR are unsigned decimal integers -with permissible values from 0 to 255 inclusive. -.PP -An atom is one of: -.RS 2 -.TP 6 -\fB(\fIre\fB)\fR -(where \fIre\fR is any regular expression) -matches a match for -\fIre\fR, with the match noted for possible reporting -.TP -\fB(?:\fIre\fB)\fR -as previous, -but does no reporting -(a ``non-capturing'' set of parentheses) -.TP -\fB()\fR -matches an empty string, -noted for possible reporting -.TP -\fB(?:)\fR -matches an empty string, -without reporting -.TP -\fB[\fIchars\fB]\fR -a \fIbracket expression\fR, -matching any one of the \fIchars\fR (see BRACKET EXPRESSIONS for more detail) -.TP - \fB.\fR -matches any single character -.TP -\fB\e\fIk\fR -(where \fIk\fR is a non-alphanumeric character) -matches that character taken as an ordinary character, -e.g. \e\e matches a backslash character -.TP -\fB\e\fIc\fR -where \fIc\fR is alphanumeric -(possibly followed by other characters), -an \fIescape\fR (AREs only), -see ESCAPES below -.TP -\fB{\fR -when followed by a character other than a digit, -matches the left-brace character `\fB{\fR'; -when followed by a digit, it is the beginning of a -\fIbound\fR (see above) -.TP -\fIx\fR -where \fIx\fR is -a single character with no other significance, matches that character. -.RE -.PP -A \fIconstraint\fR matches an empty string when specific conditions -are met. -A constraint may not be followed by a quantifier. -The simple constraints are as follows; some more constraints are -described later, under ESCAPES. -.RS 2 -.TP 8 -\fB^\fR -matches at the beginning of a line -.TP -\fB$\fR -matches at the end of a line -.TP -\fB(?=\fIre\fB)\fR -\fIpositive lookahead\fR (AREs only), matches at any point -where a substring matching \fIre\fR begins -.TP -\fB(?!\fIre\fB)\fR -\fInegative lookahead\fR (AREs only), matches at any point -where no substring matching \fIre\fR begins -.RE -.PP -The lookahead constraints may not contain back references (see later), -and all parentheses within them are considered non-capturing. -.PP -An RE may not end with `\fB\e\fR'. - -.SH "BRACKET EXPRESSIONS" -A \fIbracket expression\fR is a list of characters enclosed in `\fB[\|]\fR'. -It normally matches any single character from the list (but see below). -If the list begins with `\fB^\fR', -it matches any single character -(but see below) \fInot\fR from the rest of the list. -.PP -If two characters in the list are separated by `\fB\-\fR', -this is shorthand -for the full \fIrange\fR of characters between those two (inclusive) in the -collating sequence, -e.g. -\fB[0\-9]\fR -in ASCII matches any decimal digit. -Two ranges may not share an -endpoint, so e.g. -\fBa\-c\-e\fR -is illegal. -Ranges are very collating-sequence-dependent, -and portable programs should avoid relying on them. -.PP -To include a literal -\fB]\fR -or -\fB\-\fR -in the list, -the simplest method is to -enclose it in -\fB[.\fR and \fB.]\fR -to make it a collating element (see below). -Alternatively, -make it the first character -(following a possible `\fB^\fR'), -or (AREs only) precede it with `\fB\e\fR'. -Alternatively, for `\fB\-\fR', -make it the last character, -or the second endpoint of a range. -To use a literal -\fB\-\fR -as the first endpoint of a range, -make it a collating element -or (AREs only) precede it with `\fB\e\fR'. -With the exception of these, some combinations using -\fB[\fR -(see next -paragraphs), and escapes, -all other special characters lose their -special significance within a bracket expression. -.PP -Within a bracket expression, a collating element (a character, -a multi-character sequence that collates as if it were a single character, -or a collating-sequence name for either) -enclosed in -\fB[.\fR and \fB.]\fR -stands for the -sequence of characters of that collating element. -The sequence is a single element of the bracket expression's list. -A bracket expression in a locale that has -multi-character collating elements -can thus match more than one character. -.VS 8.2 -So (insidiously), a bracket expression that starts with \fB^\fR -can match multi-character collating elements even if none of them -appear in the bracket expression! -(\fINote:\fR Tcl currently has no multi-character collating elements. -This information is only for illustration.) -.PP -For example, assume the collating sequence includes a \fBch\fR -multi-character collating element. -Then the RE \fB[[.ch.]]*c\fR (zero or more \fBch\fP's followed by \fBc\fP) -matches the first five characters of `\fBchchcc\fR'. -Also, the RE \fB[^c]b\fR matches all of `\fBchb\fR' -(because \fB[^c]\fR matches the multi-character \fBch\fR). -.VE 8.2 -.PP -Within a bracket expression, a collating element enclosed in -\fB[=\fR -and -\fB=]\fR -is an equivalence class, standing for the sequences of characters -of all collating elements equivalent to that one, including itself. -(If there are no other equivalent collating elements, -the treatment is as if the enclosing delimiters were `\fB[.\fR'\& -and `\fB.]\fR'.) -For example, if -\fBo\fR -and -\fB\o'o^'\fR -are the members of an equivalence class, -then `\fB[[=o=]]\fR', `\fB[[=\o'o^'=]]\fR', -and `\fB[o\o'o^']\fR'\& -are all synonymous. -An equivalence class may not be an endpoint -of a range. -.VS 8.2 -(\fINote:\fR -Tcl currently implements only the Unicode locale. -It doesn't define any equivalence classes. -The examples above are just illustrations.) -.VE 8.2 -.PP -Within a bracket expression, the name of a \fIcharacter class\fR enclosed -in -\fB[:\fR -and -\fB:]\fR -stands for the list of all characters -(not all collating elements!) -belonging to that -class. -Standard character classes are: -.PP -.RS -.ne 5 -.nf -.ta 3c -\fBalpha\fR A letter. -\fBupper\fR An upper-case letter. -\fBlower\fR A lower-case letter. -\fBdigit\fR A decimal digit. -\fBxdigit\fR A hexadecimal digit. -\fBalnum\fR An alphanumeric (letter or digit). -\fBprint\fR An alphanumeric (same as alnum). -\fBblank\fR A space or tab character. -\fBspace\fR A character producing white space in displayed text. -\fBpunct\fR A punctuation character. -\fBgraph\fR A character with a visible representation. -\fBcntrl\fR A control character. -.fi -.RE -.PP -A locale may provide others. -.VS 8.2 -(Note that the current Tcl implementation has only one locale: -the Unicode locale.) -.VE 8.2 -A character class may not be used as an endpoint of a range. -.PP -There are two special cases of bracket expressions: -the bracket expressions -\fB[[:<:]]\fR -and -\fB[[:>:]]\fR -are constraints, matching empty strings at -the beginning and end of a word respectively. -'\" note, discussion of escapes below references this definition of word -A word is defined as a sequence of -word characters -that is neither preceded nor followed by -word characters. -A word character is an -\fIalnum\fR -character -or an underscore -(\fB_\fR). -These special bracket expressions are deprecated; -users of AREs should use constraint escapes instead (see below). -.SH ESCAPES -Escapes (AREs only), which begin with a -\fB\e\fR -followed by an alphanumeric character, -come in several varieties: -character entry, class shorthands, constraint escapes, and back references. -A -\fB\e\fR -followed by an alphanumeric character but not constituting -a valid escape is illegal in AREs. -In EREs, there are no escapes: -outside a bracket expression, -a -\fB\e\fR -followed by an alphanumeric character merely stands for that -character as an ordinary character, -and inside a bracket expression, -\fB\e\fR -is an ordinary character. -(The latter is the one actual incompatibility between EREs and AREs.) -.PP -Character-entry escapes (AREs only) exist to make it easier to specify -non-printing and otherwise inconvenient characters in REs: -.RS 2 -.TP 5 -\fB\ea\fR -alert (bell) character, as in C -.TP -\fB\eb\fR -backspace, as in C -.TP -\fB\eB\fR -synonym for -\fB\e\fR -to help reduce backslash doubling in some -applications where there are multiple levels of backslash processing -.TP -\fB\ec\fIX\fR -(where X is any character) the character whose -low-order 5 bits are the same as those of -\fIX\fR, -and whose other bits are all zero -.TP -\fB\ee\fR -the character whose collating-sequence name -is `\fBESC\fR', -or failing that, the character with octal value 033 -.TP -\fB\ef\fR -formfeed, as in C -.TP -\fB\en\fR -newline, as in C -.TP -\fB\er\fR -carriage return, as in C -.TP -\fB\et\fR -horizontal tab, as in C -.TP -\fB\eu\fIwxyz\fR -(where -\fIwxyz\fR -is exactly four hexadecimal digits) -the Unicode character -\fBU+\fIwxyz\fR -in the local byte ordering -.TP -\fB\eU\fIstuvwxyz\fR -(where -\fIstuvwxyz\fR -is exactly eight hexadecimal digits) -reserved for a somewhat-hypothetical Unicode extension to 32 bits -.TP -\fB\ev\fR -vertical tab, as in C -are all available. -.TP -\fB\ex\fIhhh\fR -(where -\fIhhh\fR -is any sequence of hexadecimal digits) -the character whose hexadecimal value is -\fB0x\fIhhh\fR -(a single character no matter how many hexadecimal digits are used). -.TP -\fB\e0\fR -the character whose value is -\fB0\fR -.TP -\fB\e\fIxy\fR -(where -\fIxy\fR -is exactly two octal digits, -and is not a -\fIback reference\fR (see below)) -the character whose octal value is -\fB0\fIxy\fR -.TP -\fB\e\fIxyz\fR -(where -\fIxyz\fR -is exactly three octal digits, -and is not a -back reference (see below)) -the character whose octal value is -\fB0\fIxyz\fR -.RE -.PP -Hexadecimal digits are `\fB0\fR'-`\fB9\fR', `\fBa\fR'-`\fBf\fR', -and `\fBA\fR'-`\fBF\fR'. -Octal digits are `\fB0\fR'-`\fB7\fR'. -.PP -The character-entry escapes are always taken as ordinary characters. -For example, -\fB\e135\fR -is -\fB]\fR -in ASCII, -but -\fB\e135\fR -does not terminate a bracket expression. -Beware, however, that some applications (e.g., C compilers) interpret -such sequences themselves before the regular-expression package -gets to see them, which may require doubling (quadrupling, etc.) the `\fB\e\fR'. -.PP -Class-shorthand escapes (AREs only) provide shorthands for certain commonly-used -character classes: -.RS 2 -.TP 10 -\fB\ed\fR -\fB[[:digit:]]\fR -.TP -\fB\es\fR -\fB[[:space:]]\fR -.TP -\fB\ew\fR -\fB[[:alnum:]_]\fR -(note underscore) -.TP -\fB\eD\fR -\fB[^[:digit:]]\fR -.TP -\fB\eS\fR -\fB[^[:space:]]\fR -.TP -\fB\eW\fR -\fB[^[:alnum:]_]\fR -(note underscore) -.RE -.PP -Within bracket expressions, `\fB\ed\fR', `\fB\es\fR', -and `\fB\ew\fR'\& -lose their outer brackets, -and `\fB\eD\fR', `\fB\eS\fR', -and `\fB\eW\fR'\& -are illegal. -.VS 8.2 -(So, for example, \fB[a-c\ed]\fR is equivalent to \fB[a-c[:digit:]]\fR. -Also, \fB[a-c\eD]\fR, which is equivalent to \fB[a-c^[:digit:]]\fR, is illegal.) -.VE 8.2 -.PP -A constraint escape (AREs only) is a constraint, -matching the empty string if specific conditions are met, -written as an escape: -.RS 2 -.TP 6 -\fB\eA\fR -matches only at the beginning of the string -(see MATCHING, below, for how this differs from `\fB^\fR') -.TP -\fB\em\fR -matches only at the beginning of a word -.TP -\fB\eM\fR -matches only at the end of a word -.TP -\fB\ey\fR -matches only at the beginning or end of a word -.TP -\fB\eY\fR -matches only at a point that is not the beginning or end of a word -.TP -\fB\eZ\fR -matches only at the end of the string -(see MATCHING, below, for how this differs from `\fB$\fR') -.TP -\fB\e\fIm\fR -(where -\fIm\fR -is a nonzero digit) a \fIback reference\fR, see below -.TP -\fB\e\fImnn\fR -(where -\fIm\fR -is a nonzero digit, and -\fInn\fR -is some more digits, -and the decimal value -\fImnn\fR -is not greater than the number of closing capturing parentheses seen so far) -a \fIback reference\fR, see below -.RE -.PP -A word is defined as in the specification of -\fB[[:<:]]\fR -and -\fB[[:>:]]\fR -above. -Constraint escapes are illegal within bracket expressions. -.PP -A back reference (AREs only) matches the same string matched by the parenthesized -subexpression specified by the number, -so that (e.g.) -\fB([bc])\e1\fR -matches -\fBbb\fR -or -\fBcc\fR -but not `\fBbc\fR'. -The subexpression must entirely precede the back reference in the RE. -Subexpressions are numbered in the order of their leading parentheses. -Non-capturing parentheses do not define subexpressions. -.PP -There is an inherent historical ambiguity between octal character-entry -escapes and back references, which is resolved by heuristics, -as hinted at above. -A leading zero always indicates an octal escape. -A single non-zero digit, not followed by another digit, -is always taken as a back reference. -A multi-digit sequence not starting with a zero is taken as a back -reference if it comes after a suitable subexpression -(i.e. the number is in the legal range for a back reference), -and otherwise is taken as octal. -.SH "METASYNTAX" -In addition to the main syntax described above, there are some special -forms and miscellaneous syntactic facilities available. -.PP -Normally the flavor of RE being used is specified by -application-dependent means. -However, this can be overridden by a \fIdirector\fR. -If an RE of any flavor begins with `\fB***:\fR', -the rest of the RE is an ARE. -If an RE of any flavor begins with `\fB***=\fR', -the rest of the RE is taken to be a literal string, -with all characters considered ordinary characters. -.PP -An ARE may begin with \fIembedded options\fR: -a sequence -\fB(?\fIxyz\fB)\fR -(where -\fIxyz\fR -is one or more alphabetic characters) -specifies options affecting the rest of the RE. -These supplement, and can override, -any options specified by the application. -The available option letters are: -.RS 2 -.TP 3 -\fBb\fR -rest of RE is a BRE -.TP 3 -\fBc\fR -case-sensitive matching (usual default) -.TP 3 -\fBe\fR -rest of RE is an ERE -.TP 3 -\fBi\fR -case-insensitive matching (see MATCHING, below) -.TP 3 -\fBm\fR -historical synonym for -\fBn\fR -.TP 3 -\fBn\fR -newline-sensitive matching (see MATCHING, below) -.TP 3 -\fBp\fR -partial newline-sensitive matching (see MATCHING, below) -.TP 3 -\fBq\fR -rest of RE is a literal (``quoted'') string, all ordinary characters -.TP 3 -\fBs\fR -non-newline-sensitive matching (usual default) -.TP 3 -\fBt\fR -tight syntax (usual default; see below) -.TP 3 -\fBw\fR -inverse partial newline-sensitive (``weird'') matching (see MATCHING, below) -.TP 3 -\fBx\fR -expanded syntax (see below) -.RE -.PP -Embedded options take effect at the -\fB)\fR -terminating the sequence. -They are available only at the start of an ARE, -and may not be used later within it. -.PP -In addition to the usual (\fItight\fR) RE syntax, in which all characters are -significant, there is an \fIexpanded\fR syntax, -available in all flavors of RE -with the \fB-expanded\fR switch, or in AREs with the embedded x option. -In the expanded syntax, -white-space characters are ignored -and all characters between a -\fB#\fR -and the following newline (or the end of the RE) are ignored, -permitting paragraphing and commenting a complex RE. -There are three exceptions to that basic rule: -.RS 2 -.PP -a white-space character or `\fB#\fR' preceded by `\fB\e\fR' is retained -.PP -white space or `\fB#\fR' within a bracket expression is retained -.PP -white space and comments are illegal within multi-character symbols -like the ARE `\fB(?:\fR' or the BRE `\fB\e(\fR' -.RE -.PP -Expanded-syntax white-space characters are blank, tab, newline, and -.VS 8.2 -any character that belongs to the \fIspace\fR character class. -.VE 8.2 -.PP -Finally, in an ARE, -outside bracket expressions, the sequence `\fB(?#\fIttt\fB)\fR' -(where -\fIttt\fR -is any text not containing a `\fB)\fR') -is a comment, -completely ignored. -Again, this is not allowed between the characters of -multi-character symbols like `\fB(?:\fR'. -Such comments are more a historical artifact than a useful facility, -and their use is deprecated; -use the expanded syntax instead. -.PP -\fINone\fR of these metasyntax extensions is available if the application -(or an initial -\fB***=\fR -director) -has specified that the user's input be treated as a literal string -rather than as an RE. -.SH MATCHING -In the event that an RE could match more than one substring of a given -string, -the RE matches the one starting earliest in the string. -If the RE could match more than one substring starting at that point, -its choice is determined by its \fIpreference\fR: -either the longest substring, or the shortest. -.PP -Most atoms, and all constraints, have no preference. -A parenthesized RE has the same preference (possibly none) as the RE. -A quantified atom with quantifier -\fB{\fIm\fB}\fR -or -\fB{\fIm\fB}?\fR -has the same preference (possibly none) as the atom itself. -A quantified atom with other normal quantifiers (including -\fB{\fIm\fB,\fIn\fB}\fR -with -\fIm\fR -equal to -\fIn\fR) -prefers longest match. -A quantified atom with other non-greedy quantifiers (including -\fB{\fIm\fB,\fIn\fB}?\fR -with -\fIm\fR -equal to -\fIn\fR) -prefers shortest match. -A branch has the same preference as the first quantified atom in it -which has a preference. -An RE consisting of two or more branches connected by the -\fB|\fR -operator prefers longest match. -.PP -Subject to the constraints imposed by the rules for matching the whole RE, -subexpressions also match the longest or shortest possible substrings, -based on their preferences, -with subexpressions starting earlier in the RE taking priority over -ones starting later. -Note that outer subexpressions thus take priority over -their component subexpressions. -.PP -Note that the quantifiers -\fB{1,1}\fR -and -\fB{1,1}?\fR -can be used to force longest and shortest preference, respectively, -on a subexpression or a whole RE. -.PP -Match lengths are measured in characters, not collating elements. -An empty string is considered longer than no match at all. -For example, -\fBbb*\fR -matches the three middle characters of `\fBabbbc\fR', -\fB(week|wee)(night|knights)\fR -matches all ten characters of `\fBweeknights\fR', -when -\fB(.*).*\fR -is matched against -\fBabc\fR -the parenthesized subexpression -matches all three characters, and -when -\fB(a*)*\fR -is matched against -\fBbc\fR -both the whole RE and the parenthesized -subexpression match an empty string. -.PP -If case-independent matching is specified, -the effect is much as if all case distinctions had vanished from the -alphabet. -When an alphabetic that exists in multiple cases appears as an -ordinary character outside a bracket expression, it is effectively -transformed into a bracket expression containing both cases, -so that -\fBx\fR -becomes `\fB[xX]\fR'. -When it appears inside a bracket expression, all case counterparts -of it are added to the bracket expression, so that -\fB[x]\fR -becomes -\fB[xX]\fR -and -\fB[^x]\fR -becomes `\fB[^xX]\fR'. -.PP -If newline-sensitive matching is specified, \fB.\fR -and bracket expressions using -\fB^\fR -will never match the newline character -(so that matches will never cross newlines unless the RE -explicitly arranges it) -and -\fB^\fR -and -\fB$\fR -will match the empty string after and before a newline -respectively, in addition to matching at beginning and end of string -respectively. -ARE -\fB\eA\fR -and -\fB\eZ\fR -continue to match beginning or end of string \fIonly\fR. -.PP -If partial newline-sensitive matching is specified, -this affects \fB.\fR -and bracket expressions -as with newline-sensitive matching, but not -\fB^\fR -and `\fB$\fR'. -.PP -If inverse partial newline-sensitive matching is specified, -this affects -\fB^\fR -and -\fB$\fR -as with -newline-sensitive matching, -but not \fB.\fR -and bracket expressions. -This isn't very useful but is provided for symmetry. -.SH "LIMITS AND COMPATIBILITY" -No particular limit is imposed on the length of REs. -Programs intended to be highly portable should not employ REs longer -than 256 bytes, -as a POSIX-compliant implementation can refuse to accept such REs. -.PP -The only feature of AREs that is actually incompatible with -POSIX EREs is that -\fB\e\fR -does not lose its special -significance inside bracket expressions. -All other ARE features use syntax which is illegal or has -undefined or unspecified effects in POSIX EREs; -the -\fB***\fR -syntax of directors likewise is outside the POSIX -syntax for both BREs and EREs. -.PP -Many of the ARE extensions are borrowed from Perl, but some have -been changed to clean them up, and a few Perl extensions are not present. -Incompatibilities of note include `\fB\eb\fR', `\fB\eB\fR', -the lack of special treatment for a trailing newline, -the addition of complemented bracket expressions to the things -affected by newline-sensitive matching, -the restrictions on parentheses and back references in lookahead constraints, -and the longest/shortest-match (rather than first-match) matching semantics. -.PP -The matching rules for REs containing both normal and non-greedy quantifiers -have changed since early beta-test versions of this package. -(The new rules are much simpler and cleaner, -but don't work as hard at guessing the user's real intentions.) -.PP -Henry Spencer's original 1986 \fIregexp\fR package, -still in widespread use (e.g., in pre-8.1 releases of Tcl), -implemented an early version of today's EREs. -There are four incompatibilities between \fIregexp\fR's near-EREs -(`RREs' for short) and AREs. -In roughly increasing order of significance: -.PP -.RS -In AREs, -\fB\e\fR -followed by an alphanumeric character is either an -escape or an error, -while in RREs, it was just another way of writing the -alphanumeric. -This should not be a problem because there was no reason to write -such a sequence in RREs. -.PP -\fB{\fR -followed by a digit in an ARE is the beginning of a bound, -while in RREs, -\fB{\fR -was always an ordinary character. -Such sequences should be rare, -and will often result in an error because following characters -will not look like a valid bound. -.PP -In AREs, -\fB\e\fR -remains a special character within `\fB[\|]\fR', -so a literal -\fB\e\fR -within -\fB[\|]\fR -must be written `\fB\e\e\fR'. -\fB\e\e\fR -also gives a literal -\fB\e\fR -within -\fB[\|]\fR -in RREs, -but only truly paranoid programmers routinely doubled the backslash. -.PP -AREs report the longest/shortest match for the RE, -rather than the first found in a specified search order. -This may affect some RREs which were written in the expectation that -the first match would be reported. -(The careful crafting of RREs to optimize the search order for fast -matching is obsolete (AREs examine all possible matches -in parallel, and their performance is largely insensitive to their -complexity) but cases where the search order was exploited to deliberately -find a match which was \fInot\fR the longest/shortest will need rewriting.) -.RE - -.SH "BASIC REGULAR EXPRESSIONS" -BREs differ from EREs in several respects. `\fB|\fR', `\fB+\fR', -and -\fB?\fR -are ordinary characters and there is no equivalent -for their functionality. -The delimiters for bounds are -\fB\e{\fR -and `\fB\e}\fR', -with -\fB{\fR -and -\fB}\fR -by themselves ordinary characters. -The parentheses for nested subexpressions are -\fB\e(\fR -and `\fB\e)\fR', -with -\fB(\fR -and -\fB)\fR -by themselves ordinary characters. -\fB^\fR -is an ordinary character except at the beginning of the -RE or the beginning of a parenthesized subexpression, -\fB$\fR -is an ordinary character except at the end of the -RE or the end of a parenthesized subexpression, -and -\fB*\fR -is an ordinary character if it appears at the beginning of the -RE or the beginning of a parenthesized subexpression -(after a possible leading `\fB^\fR'). -Finally, -single-digit back references are available, -and -\fB\e<\fR -and -\fB\e>\fR -are synonyms for -\fB[[:<:]]\fR -and -\fB[[:>:]]\fR -respectively; -no other escapes are available. - -.SH "SEE ALSO" -RegExp(3), regexp(n), regsub(n), lsearch(n), switch(n), text(n) - -.SH KEYWORDS -match, regular expression, string diff --git a/src/regex/regc_color.c b/src/regex/regc_color.c deleted file mode 100644 index 5376af2ed5..0000000000 --- a/src/regex/regc_color.c +++ /dev/null @@ -1,780 +0,0 @@ -/* - * colorings of characters - * This file is #included by regcomp.c. - * - * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. - * - * Development of this software was funded, in part, by Cray Research Inc., - * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics - * Corporation, none of whom are responsible for the results. The author - * thanks all of them. - * - * Redistribution and use in source and binary forms -- with or without - * modification -- are permitted for any purpose, provided that - * redistributions in source form retain this entire copyright notice and - * indicate the origin and nature of any modifications. - * - * I'd appreciate being given credit for this package in the documentation - * of software which uses it, but that is not a requirement. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, - * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY - * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL - * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; - * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR - * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF - * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * $Header$ - * - * - * Note that there are some incestuous relationships between this code and - * NFA arc maintenance, which perhaps ought to be cleaned up sometime. - */ - - - -#define CISERR() VISERR(cm->v) -#define CERR(e) VERR(cm->v, (e)) - - - -/* - * initcm - set up new colormap - */ -static void -initcm(struct vars * v, - struct colormap * cm) -{ - int i; - int j; - union tree *t; - union tree *nextt; - struct colordesc *cd; - - cm->magic = CMMAGIC; - cm->v = v; - - cm->ncds = NINLINECDS; - cm->cd = cm->cdspace; - cm->max = 0; - cm->free = 0; - - cd = cm->cd; /* cm->cd[WHITE] */ - cd->sub = NOSUB; - cd->arcs = NULL; - cd->flags = 0; - cd->nchrs = CHR_MAX - CHR_MIN + 1; - - /* upper levels of tree */ - for (t = &cm->tree[0], j = NBYTS - 1; j > 0; t = nextt, j--) - { - nextt = t + 1; - for (i = BYTTAB - 1; i >= 0; i--) - t->tptr[i] = nextt; - } - /* bottom level is solid white */ - t = &cm->tree[NBYTS - 1]; - for (i = BYTTAB - 1; i >= 0; i--) - t->tcolor[i] = WHITE; - cd->block = t; -} - -/* - * freecm - free dynamically-allocated things in a colormap - */ -static void -freecm(struct colormap * cm) -{ - size_t i; - union tree *cb; - - cm->magic = 0; - if (NBYTS > 1) - cmtreefree(cm, cm->tree, 0); - for (i = 1; i <= cm->max; i++) /* skip WHITE */ - if (!UNUSEDCOLOR(&cm->cd[i])) - { - cb = cm->cd[i].block; - if (cb != NULL) - FREE(cb); - } - if (cm->cd != cm->cdspace) - FREE(cm->cd); -} - -/* - * cmtreefree - free a non-terminal part of a colormap tree - */ -static void -cmtreefree(struct colormap * cm, - union tree * tree, - int level) /* level number (top == 0) of this block */ -{ - int i; - union tree *t; - union tree *fillt = &cm->tree[level + 1]; - union tree *cb; - - assert(level < NBYTS - 1); /* this level has pointers */ - for (i = BYTTAB - 1; i >= 0; i--) - { - t = tree->tptr[i]; - assert(t != NULL); - if (t != fillt) - { - if (level < NBYTS - 2) - { /* more pointer blocks below */ - cmtreefree(cm, t, level + 1); - FREE(t); - } - else - { /* color block below */ - cb = cm->cd[t->tcolor[0]].block; - if (t != cb) /* not a solid block */ - FREE(t); - } - } - } -} - -/* - * setcolor - set the color of a character in a colormap - */ -static color /* previous color */ -setcolor(struct colormap * cm, - chr c, - pcolor co) -{ - uchr uc = c; - int shift; - int level; - int b; - int bottom; - union tree *t; - union tree *newt; - union tree *fillt; - union tree *lastt; - union tree *cb; - color prev; - - assert(cm->magic == CMMAGIC); - if (CISERR() || co == COLORLESS) - return COLORLESS; - - t = cm->tree; - for (level = 0, shift = BYTBITS * (NBYTS - 1); shift > 0; - level++, shift -= BYTBITS) - { - b = (uc >> shift) & BYTMASK; - lastt = t; - t = lastt->tptr[b]; - assert(t != NULL); - fillt = &cm->tree[level + 1]; - bottom = (shift <= BYTBITS) ? 1 : 0; - cb = (bottom) ? cm->cd[t->tcolor[0]].block : fillt; - if (t == fillt || t == cb) - { /* must allocate a new block */ - newt = (union tree *) MALLOC((bottom) ? - sizeof(struct colors) : sizeof(struct ptrs)); - if (newt == NULL) - { - CERR(REG_ESPACE); - return COLORLESS; - } - if (bottom) - memcpy(VS(newt->tcolor), VS(t->tcolor), - BYTTAB * sizeof(color)); - else - memcpy(VS(newt->tptr), VS(t->tptr), - BYTTAB * sizeof(union tree *)); - t = newt; - lastt->tptr[b] = t; - } - } - - b = uc & BYTMASK; - prev = t->tcolor[b]; - t->tcolor[b] = (color) co; - return prev; -} - -/* - * maxcolor - report largest color number in use - */ -static color -maxcolor(struct colormap * cm) -{ - if (CISERR()) - return COLORLESS; - - return (color) cm->max; -} - -/* - * newcolor - find a new color (must be subject of setcolor at once) - * Beware: may relocate the colordescs. - */ -static color /* COLORLESS for error */ -newcolor(struct colormap * cm) -{ - struct colordesc *cd; - struct colordesc *new; - size_t n; - - if (CISERR()) - return COLORLESS; - - if (cm->free != 0) - { - assert(cm->free > 0); - assert((size_t) cm->free < cm->ncds); - cd = &cm->cd[cm->free]; - assert(UNUSEDCOLOR(cd)); - assert(cd->arcs == NULL); - cm->free = cd->sub; - } - else if (cm->max < cm->ncds - 1) - { - cm->max++; - cd = &cm->cd[cm->max]; - } - else - { - /* oops, must allocate more */ - n = cm->ncds * 2; - if (cm->cd == cm->cdspace) - { - new = (struct colordesc *) MALLOC(n * - sizeof(struct colordesc)); - if (new != NULL) - memcpy(VS(new), VS(cm->cdspace), cm->ncds * - sizeof(struct colordesc)); - } - else - new = (struct colordesc *) REALLOC(cm->cd, - n * sizeof(struct colordesc)); - if (new == NULL) - { - CERR(REG_ESPACE); - return COLORLESS; - } - cm->cd = new; - cm->ncds = n; - assert(cm->max < cm->ncds - 1); - cm->max++; - cd = &cm->cd[cm->max]; - } - - cd->nchrs = 0; - cd->sub = NOSUB; - cd->arcs = NULL; - cd->flags = 0; - cd->block = NULL; - - return (color) (cd - cm->cd); -} - -/* - * freecolor - free a color (must have no arcs or subcolor) - */ -static void -freecolor(struct colormap * cm, - pcolor co) -{ - struct colordesc *cd = &cm->cd[co]; - color pco, - nco; /* for freelist scan */ - - assert(co >= 0); - if (co == WHITE) - return; - - assert(cd->arcs == NULL); - assert(cd->sub == NOSUB); - assert(cd->nchrs == 0); - cd->flags = FREECOL; - if (cd->block != NULL) - { - FREE(cd->block); - cd->block = NULL; /* just paranoia */ - } - - if ((size_t) co == cm->max) - { - while (cm->max > WHITE && UNUSEDCOLOR(&cm->cd[cm->max])) - cm->max--; - assert(cm->free >= 0); - while ((size_t) cm->free > cm->max) - cm->free = cm->cd[cm->free].sub; - if (cm->free > 0) - { - assert(cm->free < cm->max); - pco = cm->free; - nco = cm->cd[pco].sub; - while (nco > 0) - if ((size_t) nco > cm->max) - { - /* take this one out of freelist */ - nco = cm->cd[nco].sub; - cm->cd[pco].sub = nco; - } - else - { - assert(nco < cm->max); - pco = nco; - nco = cm->cd[pco].sub; - } - } - } - else - { - cd->sub = cm->free; - cm->free = (color) (cd - cm->cd); - } -} - -/* - * pseudocolor - allocate a false color, to be managed by other means - */ -static color -pseudocolor(struct colormap * cm) -{ - color co; - - co = newcolor(cm); - if (CISERR()) - return COLORLESS; - cm->cd[co].nchrs = 1; - cm->cd[co].flags = PSEUDO; - return co; -} - -/* - * subcolor - allocate a new subcolor (if necessary) to this chr - */ -static color -subcolor(struct colormap * cm, chr c) -{ - color co; /* current color of c */ - color sco; /* new subcolor */ - - co = GETCOLOR(cm, c); - sco = newsub(cm, co); - if (CISERR()) - return COLORLESS; - assert(sco != COLORLESS); - - if (co == sco) /* already in an open subcolor */ - return co; /* rest is redundant */ - cm->cd[co].nchrs--; - cm->cd[sco].nchrs++; - setcolor(cm, c, sco); - return sco; -} - -/* - * newsub - allocate a new subcolor (if necessary) for a color - */ -static color -newsub(struct colormap * cm, - pcolor co) -{ - color sco; /* new subcolor */ - - sco = cm->cd[co].sub; - if (sco == NOSUB) - { /* color has no open subcolor */ - if (cm->cd[co].nchrs == 1) /* optimization */ - return co; - sco = newcolor(cm); /* must create subcolor */ - if (sco == COLORLESS) - { - assert(CISERR()); - return COLORLESS; - } - cm->cd[co].sub = sco; - cm->cd[sco].sub = sco; /* open subcolor points to self */ - } - assert(sco != NOSUB); - - return sco; -} - -/* - * subrange - allocate new subcolors to this range of chrs, fill in arcs - */ -static void -subrange(struct vars * v, - chr from, - chr to, - struct state * lp, - struct state * rp) -{ - uchr uf; - int i; - - assert(from <= to); - - /* first, align "from" on a tree-block boundary */ - uf = (uchr) from; - i = (int) (((uf + BYTTAB - 1) & (uchr) ~BYTMASK) - uf); - for (; from <= to && i > 0; i--, from++) - newarc(v->nfa, PLAIN, subcolor(v->cm, from), lp, rp); - if (from > to) /* didn't reach a boundary */ - return; - - /* deal with whole blocks */ - for (; to - from >= BYTTAB; from += BYTTAB) - subblock(v, from, lp, rp); - - /* clean up any remaining partial table */ - for (; from <= to; from++) - newarc(v->nfa, PLAIN, subcolor(v->cm, from), lp, rp); -} - -/* - * subblock - allocate new subcolors for one tree block of chrs, fill in arcs - */ -static void -subblock(struct vars * v, - chr start, /* first of BYTTAB chrs */ - struct state * lp, - struct state * rp) -{ - uchr uc = start; - struct colormap *cm = v->cm; - int shift; - int level; - int i; - int b; - union tree *t; - union tree *cb; - union tree *fillt; - union tree *lastt; - int previ; - int ndone; - color co; - color sco; - - assert((uc % BYTTAB) == 0); - - /* find its color block, making new pointer blocks as needed */ - t = cm->tree; - fillt = NULL; - for (level = 0, shift = BYTBITS * (NBYTS - 1); shift > 0; - level++, shift -= BYTBITS) - { - b = (uc >> shift) & BYTMASK; - lastt = t; - t = lastt->tptr[b]; - assert(t != NULL); - fillt = &cm->tree[level + 1]; - if (t == fillt && shift > BYTBITS) - { /* need new ptr block */ - t = (union tree *) MALLOC(sizeof(struct ptrs)); - if (t == NULL) - { - CERR(REG_ESPACE); - return; - } - memcpy(VS(t->tptr), VS(fillt->tptr), - BYTTAB * sizeof(union tree *)); - lastt->tptr[b] = t; - } - } - - /* special cases: fill block or solid block */ - co = t->tcolor[0]; - cb = cm->cd[co].block; - if (t == fillt || t == cb) - { - /* either way, we want a subcolor solid block */ - sco = newsub(cm, co); - t = cm->cd[sco].block; - if (t == NULL) - { /* must set it up */ - t = (union tree *) MALLOC(sizeof(struct colors)); - if (t == NULL) - { - CERR(REG_ESPACE); - return; - } - for (i = 0; i < BYTTAB; i++) - t->tcolor[i] = sco; - cm->cd[sco].block = t; - } - /* find loop must have run at least once */ - lastt->tptr[b] = t; - newarc(v->nfa, PLAIN, sco, lp, rp); - cm->cd[co].nchrs -= BYTTAB; - cm->cd[sco].nchrs += BYTTAB; - return; - } - - /* general case, a mixed block to be altered */ - i = 0; - while (i < BYTTAB) - { - co = t->tcolor[i]; - sco = newsub(cm, co); - newarc(v->nfa, PLAIN, sco, lp, rp); - previ = i; - do - { - t->tcolor[i++] = sco; - } while (i < BYTTAB && t->tcolor[i] == co); - ndone = i - previ; - cm->cd[co].nchrs -= ndone; - cm->cd[sco].nchrs += ndone; - } -} - -/* - * okcolors - promote subcolors to full colors - */ -static void -okcolors(struct nfa * nfa, - struct colormap * cm) -{ - struct colordesc *cd; - struct colordesc *end = CDEND(cm); - struct colordesc *scd; - struct arc *a; - color co; - color sco; - - for (cd = cm->cd, co = 0; cd < end; cd++, co++) - { - sco = cd->sub; - if (UNUSEDCOLOR(cd) || sco == NOSUB) - { - /* has no subcolor, no further action */ - } - else if (sco == co) - { - /* is subcolor, let parent deal with it */ - } - else if (cd->nchrs == 0) - { - /* parent empty, its arcs change color to subcolor */ - cd->sub = NOSUB; - scd = &cm->cd[sco]; - assert(scd->nchrs > 0); - assert(scd->sub == sco); - scd->sub = NOSUB; - while ((a = cd->arcs) != NULL) - { - assert(a->co == co); - /* uncolorchain(cm, a); */ - cd->arcs = a->colorchain; - a->co = sco; - /* colorchain(cm, a); */ - a->colorchain = scd->arcs; - scd->arcs = a; - } - freecolor(cm, co); - } - else - { - /* parent's arcs must gain parallel subcolor arcs */ - cd->sub = NOSUB; - scd = &cm->cd[sco]; - assert(scd->nchrs > 0); - assert(scd->sub == sco); - scd->sub = NOSUB; - for (a = cd->arcs; a != NULL; a = a->colorchain) - { - assert(a->co == co); - newarc(nfa, a->type, sco, a->from, a->to); - } - } - } -} - -/* - * colorchain - add this arc to the color chain of its color - */ -static void -colorchain(struct colormap * cm, - struct arc * a) -{ - struct colordesc *cd = &cm->cd[a->co]; - - a->colorchain = cd->arcs; - cd->arcs = a; -} - -/* - * uncolorchain - delete this arc from the color chain of its color - */ -static void -uncolorchain(struct colormap * cm, - struct arc * a) -{ - struct colordesc *cd = &cm->cd[a->co]; - struct arc *aa; - - aa = cd->arcs; - if (aa == a) /* easy case */ - cd->arcs = a->colorchain; - else - { - for (; aa != NULL && aa->colorchain != a; aa = aa->colorchain) - continue; - assert(aa != NULL); - aa->colorchain = a->colorchain; - } - a->colorchain = NULL; /* paranoia */ -} - -/* - * singleton - is this character in its own color? - */ -static int /* predicate */ -singleton(struct colormap * cm, - chr c) -{ - color co; /* color of c */ - - co = GETCOLOR(cm, c); - if (cm->cd[co].nchrs == 1 && cm->cd[co].sub == NOSUB) - return 1; - return 0; -} - -/* - * rainbow - add arcs of all full colors (but one) between specified states - */ -static void -rainbow(struct nfa * nfa, - struct colormap * cm, - int type, - pcolor but, /* COLORLESS if no exceptions */ - struct state * from, - struct state * to) -{ - struct colordesc *cd; - struct colordesc *end = CDEND(cm); - color co; - - for (cd = cm->cd, co = 0; cd < end && !CISERR(); cd++, co++) - if (!UNUSEDCOLOR(cd) && cd->sub != co && co != but && - !(cd->flags & PSEUDO)) - newarc(nfa, type, co, from, to); -} - -/* - * colorcomplement - add arcs of complementary colors - * - * The calling sequence ought to be reconciled with cloneouts(). - */ -static void -colorcomplement(struct nfa * nfa, - struct colormap * cm, - int type, - struct state * of, /* complements of this guy's PLAIN - * outarcs */ - struct state * from, - struct state * to) -{ - struct colordesc *cd; - struct colordesc *end = CDEND(cm); - color co; - - assert(of != from); - for (cd = cm->cd, co = 0; cd < end && !CISERR(); cd++, co++) - if (!UNUSEDCOLOR(cd) && !(cd->flags & PSEUDO)) - if (findarc(of, PLAIN, co) == NULL) - newarc(nfa, type, co, from, to); -} - - -#ifdef REG_DEBUG - -/* - * dumpcolors - debugging output - */ -static void -dumpcolors(struct colormap * cm, - FILE *f) -{ - struct colordesc *cd; - struct colordesc *end; - color co; - chr c; - char *has; - - fprintf(f, "max %ld\n", (long) cm->max); - if (NBYTS > 1) - fillcheck(cm, cm->tree, 0, f); - end = CDEND(cm); - for (cd = cm->cd + 1, co = 1; cd < end; cd++, co++) /* skip 0 */ - if (!UNUSEDCOLOR(cd)) - { - assert(cd->nchrs > 0); - has = (cd->block != NULL) ? "#" : ""; - if (cd->flags & PSEUDO) - fprintf(f, "#%2ld%s(ps): ", (long) co, has); - else - fprintf(f, "#%2ld%s(%2d): ", (long) co, - has, cd->nchrs); - /* it's hard to do this more efficiently */ - for (c = CHR_MIN; c < CHR_MAX; c++) - if (GETCOLOR(cm, c) == co) - dumpchr(c, f); - assert(c == CHR_MAX); - if (GETCOLOR(cm, c) == co) - dumpchr(c, f); - fprintf(f, "\n"); - } -} - -/* - * fillcheck - check proper filling of a tree - */ -static void -fillcheck(struct colormap * cm, - union tree * tree, - int level, /* level number (top == 0) of this block */ - FILE *f) -{ - int i; - union tree *t; - union tree *fillt = &cm->tree[level + 1]; - - assert(level < NBYTS - 1); /* this level has pointers */ - for (i = BYTTAB - 1; i >= 0; i--) - { - t = tree->tptr[i]; - if (t == NULL) - fprintf(f, "NULL found in filled tree!\n"); - else if (t == fillt) - { - } - else if (level < NBYTS - 2) /* more pointer blocks below */ - fillcheck(cm, t, level + 1, f); - } -} - -/* - * dumpchr - print a chr - * - * Kind of char-centric but works well enough for debug use. - */ -static void -dumpchr(chr c, - FILE *f) -{ - if (c == '\\') - fprintf(f, "\\\\"); - else if (c > ' ' && c <= '~') - putc((char) c, f); - else - fprintf(f, "\\u%04lx", (long) c); -} - -#endif /* REG_DEBUG */ diff --git a/src/regex/regc_cvec.c b/src/regex/regc_cvec.c deleted file mode 100644 index b6aa8c98f1..0000000000 --- a/src/regex/regc_cvec.c +++ /dev/null @@ -1,189 +0,0 @@ -/* - * Utility functions for handling cvecs - * This file is #included by regcomp.c. - * - * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. - * - * Development of this software was funded, in part, by Cray Research Inc., - * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics - * Corporation, none of whom are responsible for the results. The author - * thanks all of them. - * - * Redistribution and use in source and binary forms -- with or without - * modification -- are permitted for any purpose, provided that - * redistributions in source form retain this entire copyright notice and - * indicate the origin and nature of any modifications. - * - * I'd appreciate being given credit for this package in the documentation - * of software which uses it, but that is not a requirement. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, - * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY - * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL - * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; - * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR - * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF - * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * $Header$ - * - */ - -/* - * newcvec - allocate a new cvec - */ -static struct cvec * -newcvec(int nchrs, /* to hold this many chrs... */ - int nranges, /* ... and this many ranges... */ - int nmcces) /* ... and this many MCCEs */ -{ - size_t n; - size_t nc; - struct cvec *cv; - - nc = (size_t) nchrs + (size_t) nmcces *(MAXMCCE + 1) + (size_t) nranges *2; - - n = sizeof(struct cvec) + (size_t) (nmcces - 1) * sizeof(chr *) - + nc * sizeof(chr); - cv = (struct cvec *) MALLOC(n); - if (cv == NULL) - return NULL; - cv->chrspace = nchrs; - cv->chrs = (chr *) &cv->mcces[nmcces]; /* chrs just after MCCE - * ptrs */ - cv->mccespace = nmcces; - cv->ranges = cv->chrs + nchrs + nmcces * (MAXMCCE + 1); - cv->rangespace = nranges; - return clearcvec(cv); -} - -/* - * clearcvec - clear a possibly-new cvec - * Returns pointer as convenience. - */ -static struct cvec * -clearcvec(struct cvec * cv) -{ - int i; - - assert(cv != NULL); - cv->nchrs = 0; - assert(cv->chrs == (chr *) &cv->mcces[cv->mccespace]); - cv->nmcces = 0; - cv->nmccechrs = 0; - cv->nranges = 0; - for (i = 0; i < cv->mccespace; i++) - cv->mcces[i] = NULL; - - return cv; -} - -/* - * addchr - add a chr to a cvec - */ -static void -addchr(struct cvec * cv, /* character vector */ - chr c) /* character to add */ -{ - assert(cv->nchrs < cv->chrspace - cv->nmccechrs); - cv->chrs[cv->nchrs++] = (chr) c; -} - -/* - * addrange - add a range to a cvec - */ -static void -addrange(struct cvec * cv, /* character vector */ - chr from, /* first character of range */ - chr to) /* last character of range */ -{ - assert(cv->nranges < cv->rangespace); - cv->ranges[cv->nranges * 2] = (chr) from; - cv->ranges[cv->nranges * 2 + 1] = (chr) to; - cv->nranges++; -} - -/* - * addmcce - add an MCCE to a cvec - */ -static void -addmcce(struct cvec * cv, /* character vector */ - chr *startp, /* beginning of text */ - chr *endp) /* just past end of text */ -{ - int len; - int i; - chr *s; - chr *d; - - if (startp == NULL && endp == NULL) - return; - len = endp - startp; - assert(len > 0); - assert(cv->nchrs + len < cv->chrspace - cv->nmccechrs); - assert(cv->nmcces < cv->mccespace); - d = &cv->chrs[cv->chrspace - cv->nmccechrs - len - 1]; - cv->mcces[cv->nmcces++] = d; - for (s = startp, i = len; i > 0; s++, i--) - *d++ = *s; - *d++ = 0; /* endmarker */ - assert(d == &cv->chrs[cv->chrspace - cv->nmccechrs]); - cv->nmccechrs += len + 1; -} - -/* - * haschr - does a cvec contain this chr? - */ -static int /* predicate */ -haschr(struct cvec * cv, /* character vector */ - chr c) /* character to test for */ -{ - int i; - chr *p; - - for (p = cv->chrs, i = cv->nchrs; i > 0; p++, i--) - { - if (*p == c) - return 1; - } - for (p = cv->ranges, i = cv->nranges; i > 0; p += 2, i--) - { - if ((*p <= c) && (c <= *(p + 1))) - return 1; - } - return 0; -} - -/* - * getcvec - get a cvec, remembering it as v->cv - */ -static struct cvec * -getcvec(struct vars * v, /* context */ - int nchrs, /* to hold this many chrs... */ - int nranges, /* ... and this many ranges... */ - int nmcces) /* ... and this many MCCEs */ -{ - if (v->cv != NULL && nchrs <= v->cv->chrspace && - nranges <= v->cv->rangespace && nmcces <= v->cv->mccespace) - return clearcvec(v->cv); - - if (v->cv != NULL) - freecvec(v->cv); - v->cv = newcvec(nchrs, nranges, nmcces); - if (v->cv == NULL) - ERR(REG_ESPACE); - - return v->cv; -} - -/* - * freecvec - free a cvec - */ -static void -freecvec(struct cvec * cv) -{ - FREE(cv); -} diff --git a/src/regex/regc_lex.c b/src/regex/regc_lex.c deleted file mode 100644 index a24290d1a1..0000000000 --- a/src/regex/regc_lex.c +++ /dev/null @@ -1,1146 +0,0 @@ -/* - * lexical analyzer - * This file is #included by regcomp.c. - * - * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. - * - * Development of this software was funded, in part, by Cray Research Inc., - * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics - * Corporation, none of whom are responsible for the results. The author - * thanks all of them. - * - * Redistribution and use in source and binary forms -- with or without - * modification -- are permitted for any purpose, provided that - * redistributions in source form retain this entire copyright notice and - * indicate the origin and nature of any modifications. - * - * I'd appreciate being given credit for this package in the documentation - * of software which uses it, but that is not a requirement. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, - * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY - * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL - * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; - * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR - * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF - * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * $Header$ - * - */ - -/* scanning macros (know about v) */ -#define ATEOS() (v->now >= v->stop) -#define HAVE(n) (v->stop - v->now >= (n)) -#define NEXT1(c) (!ATEOS() && *v->now == CHR(c)) -#define NEXT2(a,b) (HAVE(2) && *v->now == CHR(a) && *(v->now+1) == CHR(b)) -#define NEXT3(a,b,c) (HAVE(3) && *v->now == CHR(a) && \ - *(v->now+1) == CHR(b) && \ - *(v->now+2) == CHR(c)) -#define SET(c) (v->nexttype = (c)) -#define SETV(c, n) (v->nexttype = (c), v->nextvalue = (n)) -#define RET(c) return (SET(c), 1) -#define RETV(c, n) return (SETV(c, n), 1) -#define FAILW(e) return (ERR(e), 0) /* ERR does SET(EOS) */ -#define LASTTYPE(t) (v->lasttype == (t)) - -/* lexical contexts */ -#define L_ERE 1 /* mainline ERE/ARE */ -#define L_BRE 2 /* mainline BRE */ -#define L_Q 3 /* REG_QUOTE */ -#define L_EBND 4 /* ERE/ARE bound */ -#define L_BBND 5 /* BRE bound */ -#define L_BRACK 6 /* brackets */ -#define L_CEL 7 /* collating element */ -#define L_ECL 8 /* equivalence class */ -#define L_CCL 9 /* character class */ -#define INTOCON(c) (v->lexcon = (c)) -#define INCON(con) (v->lexcon == (con)) - -/* construct pointer past end of chr array */ -#define ENDOF(array) ((array) + sizeof(array)/sizeof(chr)) - -/* - * lexstart - set up lexical stuff, scan leading options - */ -static void -lexstart(struct vars * v) -{ - prefixes(v); /* may turn on new type bits etc. */ - NOERR(); - - if (v->cflags & REG_QUOTE) - { - assert(!(v->cflags & (REG_ADVANCED | REG_EXPANDED | REG_NEWLINE))); - INTOCON(L_Q); - } - else if (v->cflags & REG_EXTENDED) - { - assert(!(v->cflags & REG_QUOTE)); - INTOCON(L_ERE); - } - else - { - assert(!(v->cflags & (REG_QUOTE | REG_ADVF))); - INTOCON(L_BRE); - } - - v->nexttype = EMPTY; /* remember we were at the start */ - next(v); /* set up the first token */ -} - -/* - * prefixes - implement various special prefixes - */ -static void -prefixes(struct vars * v) -{ - /* literal string doesn't get any of this stuff */ - if (v->cflags & REG_QUOTE) - return; - - /* initial "***" gets special things */ - if (HAVE(4) && NEXT3('*', '*', '*')) - switch (*(v->now + 3)) - { - case CHR('?'): /* "***?" error, msg shows version */ - ERR(REG_BADPAT); - return; /* proceed no further */ - break; - case CHR('='): /* "***=" shifts to literal string */ - NOTE(REG_UNONPOSIX); - v->cflags |= REG_QUOTE; - v->cflags &= ~(REG_ADVANCED | REG_EXPANDED | REG_NEWLINE); - v->now += 4; - return; /* and there can be no more prefixes */ - break; - case CHR(':'): /* "***:" shifts to AREs */ - NOTE(REG_UNONPOSIX); - v->cflags |= REG_ADVANCED; - v->now += 4; - break; - default: /* otherwise *** is just an error */ - ERR(REG_BADRPT); - return; - break; - } - - /* BREs and EREs don't get embedded options */ - if ((v->cflags & REG_ADVANCED) != REG_ADVANCED) - return; - - /* embedded options (AREs only) */ - if (HAVE(3) && NEXT2('(', '?') && iscalpha(*(v->now + 2))) - { - NOTE(REG_UNONPOSIX); - v->now += 2; - for (; !ATEOS() && iscalpha(*v->now); v->now++) - switch (*v->now) - { - case CHR('b'): /* BREs (but why???) */ - v->cflags &= ~(REG_ADVANCED | REG_QUOTE); - break; - case CHR('c'): /* case sensitive */ - v->cflags &= ~REG_ICASE; - break; - case CHR('e'): /* plain EREs */ - v->cflags |= REG_EXTENDED; - v->cflags &= ~(REG_ADVF | REG_QUOTE); - break; - case CHR('i'): /* case insensitive */ - v->cflags |= REG_ICASE; - break; - case CHR('m'): /* Perloid synonym for n */ - case CHR('n'): /* \n affects ^ $ . [^ */ - v->cflags |= REG_NEWLINE; - break; - case CHR('p'): /* ~Perl, \n affects . [^ */ - v->cflags |= REG_NLSTOP; - v->cflags &= ~REG_NLANCH; - break; - case CHR('q'): /* literal string */ - v->cflags |= REG_QUOTE; - v->cflags &= ~REG_ADVANCED; - break; - case CHR('s'): /* single line, \n ordinary */ - v->cflags &= ~REG_NEWLINE; - break; - case CHR('t'): /* tight syntax */ - v->cflags &= ~REG_EXPANDED; - break; - case CHR('w'): /* weird, \n affects ^ $ only */ - v->cflags &= ~REG_NLSTOP; - v->cflags |= REG_NLANCH; - break; - case CHR('x'): /* expanded syntax */ - v->cflags |= REG_EXPANDED; - break; - default: - ERR(REG_BADOPT); - return; - } - if (!NEXT1(')')) - { - ERR(REG_BADOPT); - return; - } - v->now++; - if (v->cflags & REG_QUOTE) - v->cflags &= ~(REG_EXPANDED | REG_NEWLINE); - } -} - -/* - * lexnest - "call a subroutine", interpolating string at the lexical level - * - * Note, this is not a very general facility. There are a number of - * implicit assumptions about what sorts of strings can be subroutines. - */ -static void -lexnest(struct vars * v, - chr *beginp, /* start of interpolation */ - chr *endp) /* one past end of interpolation */ -{ - assert(v->savenow == NULL); /* only one level of nesting */ - v->savenow = v->now; - v->savestop = v->stop; - v->now = beginp; - v->stop = endp; -} - -/* - * string constants to interpolate as expansions of things like \d - */ -static chr backd[] = { /* \d */ - CHR('['), CHR('['), CHR(':'), - CHR('d'), CHR('i'), CHR('g'), CHR('i'), CHR('t'), - CHR(':'), CHR(']'), CHR(']') -}; -static chr backD[] = { /* \D */ - CHR('['), CHR('^'), CHR('['), CHR(':'), - CHR('d'), CHR('i'), CHR('g'), CHR('i'), CHR('t'), - CHR(':'), CHR(']'), CHR(']') -}; -static chr brbackd[] = { /* \d within brackets */ - CHR('['), CHR(':'), - CHR('d'), CHR('i'), CHR('g'), CHR('i'), CHR('t'), - CHR(':'), CHR(']') -}; -static chr backs[] = { /* \s */ - CHR('['), CHR('['), CHR(':'), - CHR('s'), CHR('p'), CHR('a'), CHR('c'), CHR('e'), - CHR(':'), CHR(']'), CHR(']') -}; -static chr backS[] = { /* \S */ - CHR('['), CHR('^'), CHR('['), CHR(':'), - CHR('s'), CHR('p'), CHR('a'), CHR('c'), CHR('e'), - CHR(':'), CHR(']'), CHR(']') -}; -static chr brbacks[] = { /* \s within brackets */ - CHR('['), CHR(':'), - CHR('s'), CHR('p'), CHR('a'), CHR('c'), CHR('e'), - CHR(':'), CHR(']') -}; -static chr backw[] = { /* \w */ - CHR('['), CHR('['), CHR(':'), - CHR('a'), CHR('l'), CHR('n'), CHR('u'), CHR('m'), - CHR(':'), CHR(']'), CHR('_'), CHR(']') -}; -static chr backW[] = { /* \W */ - CHR('['), CHR('^'), CHR('['), CHR(':'), - CHR('a'), CHR('l'), CHR('n'), CHR('u'), CHR('m'), - CHR(':'), CHR(']'), CHR('_'), CHR(']') -}; -static chr brbackw[] = { /* \w within brackets */ - CHR('['), CHR(':'), - CHR('a'), CHR('l'), CHR('n'), CHR('u'), CHR('m'), - CHR(':'), CHR(']'), CHR('_') -}; - -/* - * lexword - interpolate a bracket expression for word characters - * Possibly ought to inquire whether there is a "word" character class. - */ -static void -lexword(struct vars * v) -{ - lexnest(v, backw, ENDOF(backw)); -} - -/* - * next - get next token - */ -static int /* 1 normal, 0 failure */ -next(struct vars * v) -{ - chr c; - - /* errors yield an infinite sequence of failures */ - if (ISERR()) - return 0; /* the error has set nexttype to EOS */ - - /* remember flavor of last token */ - v->lasttype = v->nexttype; - - /* REG_BOSONLY */ - if (v->nexttype == EMPTY && (v->cflags & REG_BOSONLY)) - { - /* at start of a REG_BOSONLY RE */ - RETV(SBEGIN, 0); /* same as \A */ - } - - /* if we're nested and we've hit end, return to outer level */ - if (v->savenow != NULL && ATEOS()) - { - v->now = v->savenow; - v->stop = v->savestop; - v->savenow = v->savestop = NULL; - } - - /* skip white space etc. if appropriate (not in literal or []) */ - if (v->cflags & REG_EXPANDED) - switch (v->lexcon) - { - case L_ERE: - case L_BRE: - case L_EBND: - case L_BBND: - skip(v); - break; - } - - /* handle EOS, depending on context */ - if (ATEOS()) - { - switch (v->lexcon) - { - case L_ERE: - case L_BRE: - case L_Q: - RET(EOS); - break; - case L_EBND: - case L_BBND: - FAILW(REG_EBRACE); - break; - case L_BRACK: - case L_CEL: - case L_ECL: - case L_CCL: - FAILW(REG_EBRACK); - break; - } - assert(NOTREACHED); - } - - /* okay, time to actually get a character */ - c = *v->now++; - - /* deal with the easy contexts, punt EREs to code below */ - switch (v->lexcon) - { - case L_BRE: /* punt BREs to separate function */ - return brenext(v, c); - break; - case L_ERE: /* see below */ - break; - case L_Q: /* literal strings are easy */ - RETV(PLAIN, c); - break; - case L_BBND: /* bounds are fairly simple */ - case L_EBND: - switch (c) - { - case CHR('0'): - case CHR('1'): - case CHR('2'): - case CHR('3'): - case CHR('4'): - case CHR('5'): - case CHR('6'): - case CHR('7'): - case CHR('8'): - case CHR('9'): - RETV(DIGIT, (chr) DIGITVAL(c)); - break; - case CHR(','): - RET(','); - break; - case CHR('}'): /* ERE bound ends with } */ - if (INCON(L_EBND)) - { - INTOCON(L_ERE); - if ((v->cflags & REG_ADVF) && NEXT1('?')) - { - v->now++; - NOTE(REG_UNONPOSIX); - RETV('}', 0); - } - RETV('}', 1); - } - else - FAILW(REG_BADBR); - break; - case CHR('\\'): /* BRE bound ends with \} */ - if (INCON(L_BBND) && NEXT1('}')) - { - v->now++; - INTOCON(L_BRE); - RET('}'); - } - else - FAILW(REG_BADBR); - break; - default: - FAILW(REG_BADBR); - break; - } - assert(NOTREACHED); - break; - case L_BRACK: /* brackets are not too hard */ - switch (c) - { - case CHR(']'): - if (LASTTYPE('[')) - RETV(PLAIN, c); - else - { - INTOCON((v->cflags & REG_EXTENDED) ? - L_ERE : L_BRE); - RET(']'); - } - break; - case CHR('\\'): - NOTE(REG_UBBS); - if (!(v->cflags & REG_ADVF)) - RETV(PLAIN, c); - NOTE(REG_UNONPOSIX); - if (ATEOS()) - FAILW(REG_EESCAPE); - (DISCARD) lexescape(v); - switch (v->nexttype) - { /* not all escapes okay here */ - case PLAIN: - return 1; - break; - case CCLASS: - switch (v->nextvalue) - { - case 'd': - lexnest(v, brbackd, ENDOF(brbackd)); - break; - case 's': - lexnest(v, brbacks, ENDOF(brbacks)); - break; - case 'w': - lexnest(v, brbackw, ENDOF(brbackw)); - break; - default: - FAILW(REG_EESCAPE); - break; - } - /* lexnest done, back up and try again */ - v->nexttype = v->lasttype; - return next(v); - break; - } - /* not one of the acceptable escapes */ - FAILW(REG_EESCAPE); - break; - case CHR('-'): - if (LASTTYPE('[') || NEXT1(']')) - RETV(PLAIN, c); - else - RETV(RANGE, c); - break; - case CHR('['): - if (ATEOS()) - FAILW(REG_EBRACK); - switch (*v->now++) - { - case CHR('.'): - INTOCON(L_CEL); - /* might or might not be locale-specific */ - RET(COLLEL); - break; - case CHR('='): - INTOCON(L_ECL); - NOTE(REG_ULOCALE); - RET(ECLASS); - break; - case CHR(':'): - INTOCON(L_CCL); - NOTE(REG_ULOCALE); - RET(CCLASS); - break; - default: /* oops */ - v->now--; - RETV(PLAIN, c); - break; - } - assert(NOTREACHED); - break; - default: - RETV(PLAIN, c); - break; - } - assert(NOTREACHED); - break; - case L_CEL: /* collating elements are easy */ - if (c == CHR('.') && NEXT1(']')) - { - v->now++; - INTOCON(L_BRACK); - RETV(END, '.'); - } - else - RETV(PLAIN, c); - break; - case L_ECL: /* ditto equivalence classes */ - if (c == CHR('=') && NEXT1(']')) - { - v->now++; - INTOCON(L_BRACK); - RETV(END, '='); - } - else - RETV(PLAIN, c); - break; - case L_CCL: /* ditto character classes */ - if (c == CHR(':') && NEXT1(']')) - { - v->now++; - INTOCON(L_BRACK); - RETV(END, ':'); - } - else - RETV(PLAIN, c); - break; - default: - assert(NOTREACHED); - break; - } - - /* that got rid of everything except EREs and AREs */ - assert(INCON(L_ERE)); - - /* deal with EREs and AREs, except for backslashes */ - switch (c) - { - case CHR('|'): - RET('|'); - break; - case CHR('*'): - if ((v->cflags & REG_ADVF) && NEXT1('?')) - { - v->now++; - NOTE(REG_UNONPOSIX); - RETV('*', 0); - } - RETV('*', 1); - break; - case CHR('+'): - if ((v->cflags & REG_ADVF) && NEXT1('?')) - { - v->now++; - NOTE(REG_UNONPOSIX); - RETV('+', 0); - } - RETV('+', 1); - break; - case CHR('?'): - if ((v->cflags & REG_ADVF) && NEXT1('?')) - { - v->now++; - NOTE(REG_UNONPOSIX); - RETV('?', 0); - } - RETV('?', 1); - break; - case CHR('{'): /* bounds start or plain character */ - if (v->cflags & REG_EXPANDED) - skip(v); - if (ATEOS() || !iscdigit(*v->now)) - { - NOTE(REG_UBRACES); - NOTE(REG_UUNSPEC); - RETV(PLAIN, c); - } - else - { - NOTE(REG_UBOUNDS); - INTOCON(L_EBND); - RET('{'); - } - assert(NOTREACHED); - break; - case CHR('('): /* parenthesis, or advanced extension */ - if ((v->cflags & REG_ADVF) && NEXT1('?')) - { - NOTE(REG_UNONPOSIX); - v->now++; - switch (*v->now++) - { - case CHR(':'): /* non-capturing paren */ - RETV('(', 0); - break; - case CHR('#'): /* comment */ - while (!ATEOS() && *v->now != CHR(')')) - v->now++; - if (!ATEOS()) - v->now++; - assert(v->nexttype == v->lasttype); - return next(v); - break; - case CHR('='): /* positive lookahead */ - NOTE(REG_ULOOKAHEAD); - RETV(LACON, 1); - break; - case CHR('!'): /* negative lookahead */ - NOTE(REG_ULOOKAHEAD); - RETV(LACON, 0); - break; - default: - FAILW(REG_BADRPT); - break; - } - assert(NOTREACHED); - } - if (v->cflags & REG_NOSUB) - RETV('(', 0); /* all parens non-capturing */ - else - RETV('(', 1); - break; - case CHR(')'): - if (LASTTYPE('(')) - NOTE(REG_UUNSPEC); - RETV(')', c); - break; - case CHR('['): /* easy except for [[:<:]] and [[:>:]] */ - if (HAVE(6) && *(v->now + 0) == CHR('[') && - *(v->now + 1) == CHR(':') && - (*(v->now + 2) == CHR('<') || - *(v->now + 2) == CHR('>')) && - *(v->now + 3) == CHR(':') && - *(v->now + 4) == CHR(']') && - *(v->now + 5) == CHR(']')) - { - c = *(v->now + 2); - v->now += 6; - NOTE(REG_UNONPOSIX); - RET((c == CHR('<')) ? '<' : '>'); - } - INTOCON(L_BRACK); - if (NEXT1('^')) - { - v->now++; - RETV('[', 0); - } - RETV('[', 1); - break; - case CHR('.'): - RET('.'); - break; - case CHR('^'): - RET('^'); - break; - case CHR('$'): - RET('$'); - break; - case CHR('\\'): /* mostly punt backslashes to code below */ - if (ATEOS()) - FAILW(REG_EESCAPE); - break; - default: /* ordinary character */ - RETV(PLAIN, c); - break; - } - - /* ERE/ARE backslash handling; backslash already eaten */ - assert(!ATEOS()); - if (!(v->cflags & REG_ADVF)) - { /* only AREs have non-trivial escapes */ - if (iscalnum(*v->now)) - { - NOTE(REG_UBSALNUM); - NOTE(REG_UUNSPEC); - } - RETV(PLAIN, *v->now++); - } - (DISCARD) lexescape(v); - if (ISERR()) - FAILW(REG_EESCAPE); - if (v->nexttype == CCLASS) - { /* fudge at lexical level */ - switch (v->nextvalue) - { - case 'd': - lexnest(v, backd, ENDOF(backd)); - break; - case 'D': - lexnest(v, backD, ENDOF(backD)); - break; - case 's': - lexnest(v, backs, ENDOF(backs)); - break; - case 'S': - lexnest(v, backS, ENDOF(backS)); - break; - case 'w': - lexnest(v, backw, ENDOF(backw)); - break; - case 'W': - lexnest(v, backW, ENDOF(backW)); - break; - default: - assert(NOTREACHED); - FAILW(REG_ASSERT); - break; - } - /* lexnest done, back up and try again */ - v->nexttype = v->lasttype; - return next(v); - } - /* otherwise, lexescape has already done the work */ - return !ISERR(); -} - -/* - * lexescape - parse an ARE backslash escape (backslash already eaten) - * Note slightly nonstandard use of the CCLASS type code. - */ -static int /* not actually used, but convenient for - * RETV */ -lexescape(struct vars * v) -{ - chr c; - static chr alert[] = { - CHR('a'), CHR('l'), CHR('e'), CHR('r'), CHR('t') - }; - static chr esc[] = { - CHR('E'), CHR('S'), CHR('C') - }; - chr *save; - - assert(v->cflags & REG_ADVF); - - assert(!ATEOS()); - c = *v->now++; - if (!iscalnum(c)) - RETV(PLAIN, c); - - NOTE(REG_UNONPOSIX); - switch (c) - { - case CHR('a'): - RETV(PLAIN, chrnamed(v, alert, ENDOF(alert), CHR('\007'))); - break; - case CHR('A'): - RETV(SBEGIN, 0); - break; - case CHR('b'): - RETV(PLAIN, CHR('\b')); - break; - case CHR('B'): - RETV(PLAIN, CHR('\\')); - break; - case CHR('c'): - NOTE(REG_UUNPORT); - if (ATEOS()) - FAILW(REG_EESCAPE); - RETV(PLAIN, (chr) (*v->now++ & 037)); - break; - case CHR('d'): - NOTE(REG_ULOCALE); - RETV(CCLASS, 'd'); - break; - case CHR('D'): - NOTE(REG_ULOCALE); - RETV(CCLASS, 'D'); - break; - case CHR('e'): - NOTE(REG_UUNPORT); - RETV(PLAIN, chrnamed(v, esc, ENDOF(esc), CHR('\033'))); - break; - case CHR('f'): - RETV(PLAIN, CHR('\f')); - break; - case CHR('m'): - RET('<'); - break; - case CHR('M'): - RET('>'); - break; - case CHR('n'): - RETV(PLAIN, CHR('\n')); - break; - case CHR('r'): - RETV(PLAIN, CHR('\r')); - break; - case CHR('s'): - NOTE(REG_ULOCALE); - RETV(CCLASS, 's'); - break; - case CHR('S'): - NOTE(REG_ULOCALE); - RETV(CCLASS, 'S'); - break; - case CHR('t'): - RETV(PLAIN, CHR('\t')); - break; - case CHR('u'): - c = lexdigits(v, 16, 4, 4); - if (ISERR()) - FAILW(REG_EESCAPE); - RETV(PLAIN, c); - break; - case CHR('U'): - c = lexdigits(v, 16, 8, 8); - if (ISERR()) - FAILW(REG_EESCAPE); - RETV(PLAIN, c); - break; - case CHR('v'): - RETV(PLAIN, CHR('\v')); - break; - case CHR('w'): - NOTE(REG_ULOCALE); - RETV(CCLASS, 'w'); - break; - case CHR('W'): - NOTE(REG_ULOCALE); - RETV(CCLASS, 'W'); - break; - case CHR('x'): - NOTE(REG_UUNPORT); - c = lexdigits(v, 16, 1, 255); /* REs >255 long outside - * spec */ - if (ISERR()) - FAILW(REG_EESCAPE); - RETV(PLAIN, c); - break; - case CHR('y'): - NOTE(REG_ULOCALE); - RETV(WBDRY, 0); - break; - case CHR('Y'): - NOTE(REG_ULOCALE); - RETV(NWBDRY, 0); - break; - case CHR('Z'): - RETV(SEND, 0); - break; - case CHR('1'): - case CHR('2'): - case CHR('3'): - case CHR('4'): - case CHR('5'): - case CHR('6'): - case CHR('7'): - case CHR('8'): - case CHR('9'): - save = v->now; - v->now--; /* put first digit back */ - c = lexdigits(v, 10, 1, 255); /* REs >255 long outside - * spec */ - if (ISERR()) - FAILW(REG_EESCAPE); - /* ugly heuristic (first test is "exactly 1 digit?") */ - if (v->now - save == 0 || (int) c <= v->nsubexp) - { - NOTE(REG_UBACKREF); - RETV(BACKREF, (chr) c); - } - /* oops, doesn't look like it's a backref after all... */ - v->now = save; - /* and fall through into octal number */ - case CHR('0'): - NOTE(REG_UUNPORT); - v->now--; /* put first digit back */ - c = lexdigits(v, 8, 1, 3); - if (ISERR()) - FAILW(REG_EESCAPE); - RETV(PLAIN, c); - break; - default: - assert(iscalpha(c)); - FAILW(REG_EESCAPE); /* unknown alphabetic escape */ - break; - } - assert(NOTREACHED); -} - -/* - * lexdigits - slurp up digits and return chr value - */ -static chr /* chr value; errors signalled via ERR */ -lexdigits(struct vars * v, - int base, - int minlen, - int maxlen) -{ - uchr n; /* unsigned to avoid overflow misbehavior */ - int len; - chr c; - int d; - const uchr ub = (uchr) base; - - n = 0; - for (len = 0; len < maxlen && !ATEOS(); len++) - { - c = *v->now++; - switch (c) - { - case CHR('0'): - case CHR('1'): - case CHR('2'): - case CHR('3'): - case CHR('4'): - case CHR('5'): - case CHR('6'): - case CHR('7'): - case CHR('8'): - case CHR('9'): - d = DIGITVAL(c); - break; - case CHR('a'): - case CHR('A'): - d = 10; - break; - case CHR('b'): - case CHR('B'): - d = 11; - break; - case CHR('c'): - case CHR('C'): - d = 12; - break; - case CHR('d'): - case CHR('D'): - d = 13; - break; - case CHR('e'): - case CHR('E'): - d = 14; - break; - case CHR('f'): - case CHR('F'): - d = 15; - break; - default: - v->now--; /* oops, not a digit at all */ - d = -1; - break; - } - - if (d >= base) - { /* not a plausible digit */ - v->now--; - d = -1; - } - if (d < 0) - break; /* NOTE BREAK OUT */ - n = n * ub + (uchr) d; - } - if (len < minlen) - ERR(REG_EESCAPE); - - return (chr) n; -} - -/* - * brenext - get next BRE token - * - * This is much like EREs except for all the stupid backslashes and the - * context-dependency of some things. - */ -static int /* 1 normal, 0 failure */ -brenext(struct vars * v, - chr pc) -{ - chr c = (chr) pc; - - switch (c) - { - case CHR('*'): - if (LASTTYPE(EMPTY) || LASTTYPE('(') || LASTTYPE('^')) - RETV(PLAIN, c); - RET('*'); - break; - case CHR('['): - if (HAVE(6) && *(v->now + 0) == CHR('[') && - *(v->now + 1) == CHR(':') && - (*(v->now + 2) == CHR('<') || - *(v->now + 2) == CHR('>')) && - *(v->now + 3) == CHR(':') && - *(v->now + 4) == CHR(']') && - *(v->now + 5) == CHR(']')) - { - c = *(v->now + 2); - v->now += 6; - NOTE(REG_UNONPOSIX); - RET((c == CHR('<')) ? '<' : '>'); - } - INTOCON(L_BRACK); - if (NEXT1('^')) - { - v->now++; - RETV('[', 0); - } - RETV('[', 1); - break; - case CHR('.'): - RET('.'); - break; - case CHR('^'): - if (LASTTYPE(EMPTY)) - RET('^'); - if (LASTTYPE('(')) - { - NOTE(REG_UUNSPEC); - RET('^'); - } - RETV(PLAIN, c); - break; - case CHR('$'): - if (v->cflags & REG_EXPANDED) - skip(v); - if (ATEOS()) - RET('$'); - if (NEXT2('\\', ')')) - { - NOTE(REG_UUNSPEC); - RET('$'); - } - RETV(PLAIN, c); - break; - case CHR('\\'): - break; /* see below */ - default: - RETV(PLAIN, c); - break; - } - - assert(c == CHR('\\')); - - if (ATEOS()) - FAILW(REG_EESCAPE); - - c = *v->now++; - switch (c) - { - case CHR('{'): - INTOCON(L_BBND); - NOTE(REG_UBOUNDS); - RET('{'); - break; - case CHR('('): - RETV('(', 1); - break; - case CHR(')'): - RETV(')', c); - break; - case CHR('<'): - NOTE(REG_UNONPOSIX); - RET('<'); - break; - case CHR('>'): - NOTE(REG_UNONPOSIX); - RET('>'); - break; - case CHR('1'): - case CHR('2'): - case CHR('3'): - case CHR('4'): - case CHR('5'): - case CHR('6'): - case CHR('7'): - case CHR('8'): - case CHR('9'): - NOTE(REG_UBACKREF); - RETV(BACKREF, (chr) DIGITVAL(c)); - break; - default: - if (iscalnum(c)) - { - NOTE(REG_UBSALNUM); - NOTE(REG_UUNSPEC); - } - RETV(PLAIN, c); - break; - } - - assert(NOTREACHED); -} - -/* - * skip - skip white space and comments in expanded form - */ -static void -skip(struct vars * v) -{ - chr *start = v->now; - - assert(v->cflags & REG_EXPANDED); - - for (;;) - { - while (!ATEOS() && iscspace(*v->now)) - v->now++; - if (ATEOS() || *v->now != CHR('#')) - break; /* NOTE BREAK OUT */ - assert(NEXT1('#')); - while (!ATEOS() && *v->now != CHR('\n')) - v->now++; - /* leave the newline to be picked up by the iscspace loop */ - } - - if (v->now != start) - NOTE(REG_UNONPOSIX); -} - -/* - * newline - return the chr for a newline - * - * This helps confine use of CHR to this source file. - */ -static chr -newline(void) -{ - return CHR('\n'); -} - -/* - * chrnamed - return the chr known by a given (chr string) name - * - * The code is a bit clumsy, but this routine gets only such specialized - * use that it hardly matters. - */ -static chr -chrnamed(struct vars * v, - chr *startp, /* start of name */ - chr *endp, /* just past end of name */ - chr lastresort) /* what to return if name lookup fails */ -{ - celt c; - int errsave; - int e; - struct cvec *cv; - - errsave = v->err; - v->err = 0; - c = element(v, startp, endp); - e = v->err; - v->err = errsave; - - if (e != 0) - return (chr) lastresort; - - cv = range(v, c, c, 0); - if (cv->nchrs == 0) - return (chr) lastresort; - return cv->chrs[0]; -} diff --git a/src/regex/regc_locale.c b/src/regex/regc_locale.c deleted file mode 100644 index 4e13b8488b..0000000000 --- a/src/regex/regc_locale.c +++ /dev/null @@ -1,838 +0,0 @@ -/* - * regc_locale.c -- - * - * This file contains locale-specific regexp routines. - * This file is #included by regcomp.c. - * - * Copyright (c) 1998 by Scriptics Corporation. - * - * This software is copyrighted by the Regents of the University of - * California, Sun Microsystems, Inc., Scriptics Corporation, ActiveState - * Corporation and other parties. The following terms apply to all files - * associated with the software unless explicitly disclaimed in - * individual files. - * - * The authors hereby grant permission to use, copy, modify, distribute, - * and license this software and its documentation for any purpose, provided - * that existing copyright notices are retained in all copies and that this - * notice is included verbatim in any distributions. No written agreement, - * license, or royalty fee is required for any of the authorized uses. - * Modifications to this software may be copyrighted by their authors - * and need not follow the licensing terms described here, provided that - * the new terms are clearly indicated on the first page of each file where - * they apply. - * - * IN NO EVENT SHALL THE AUTHORS OR DISTRIBUTORS BE LIABLE TO ANY PARTY - * FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES - * ARISING OUT OF THE USE OF THIS SOFTWARE, ITS DOCUMENTATION, OR ANY - * DERIVATIVES THEREOF, EVEN IF THE AUTHORS HAVE BEEN ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * THE AUTHORS AND DISTRIBUTORS SPECIFICALLY DISCLAIM ANY WARRANTIES, - * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, AND NON-INFRINGEMENT. THIS SOFTWARE - * IS PROVIDED ON AN "AS IS" BASIS, AND THE AUTHORS AND DISTRIBUTORS HAVE - * NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR - * MODIFICATIONS. - * - * GOVERNMENT USE: If you are acquiring this software on behalf of the - * U.S. government, the Government shall have only "Restricted Rights" - * in the software and related documentation as defined in the Federal - * Acquisition Regulations (FARs) in Clause 52.227.19 (c) (2). If you - * are acquiring the software on behalf of the Department of Defense, the - * software shall be classified as "Commercial Computer Software" and the - * Government shall have only "Restricted Rights" as defined in Clause - * 252.227-7013 (c) (1) of DFARs. Notwithstanding the foregoing, the - * authors grant the U.S. Government and others acting in its behalf - * permission to use and distribute the software in accordance with the - * terms specified in this license. - * - * $Header$ - */ - -int char_and_wchar_strncmp (const char* cp, const wx_wchar* wp, size_t nNum) -{ - while(*cp++ == (const char)*wp++ && --nNum){} - - return nNum; -} - -/* ASCII character-name table */ - -static struct cname -{ - char *name; - char code; -} cnames[] = - -{ - { - "NUL", '\0' - }, - { - "SOH", '\001' - }, - { - "STX", '\002' - }, - { - "ETX", '\003' - }, - { - "EOT", '\004' - }, - { - "ENQ", '\005' - }, - { - "ACK", '\006' - }, - { - "BEL", '\007' - }, - { - "alert", '\007' - }, - { - "BS", '\010' - }, - { - "backspace", '\b' - }, - { - "HT", '\011' - }, - { - "tab", '\t' - }, - { - "LF", '\012' - }, - { - "newline", '\n' - }, - { - "VT", '\013' - }, - { - "vertical-tab", '\v' - }, - { - "FF", '\014' - }, - { - "form-feed", '\f' - }, - { - "CR", '\015' - }, - { - "carriage-return", '\r' - }, - { - "SO", '\016' - }, - { - "SI", '\017' - }, - { - "DLE", '\020' - }, - { - "DC1", '\021' - }, - { - "DC2", '\022' - }, - { - "DC3", '\023' - }, - { - "DC4", '\024' - }, - { - "NAK", '\025' - }, - { - "SYN", '\026' - }, - { - "ETB", '\027' - }, - { - "CAN", '\030' - }, - { - "EM", '\031' - }, - { - "SUB", '\032' - }, - { - "ESC", '\033' - }, - { - "IS4", '\034' - }, - { - "FS", '\034' - }, - { - "IS3", '\035' - }, - { - "GS", '\035' - }, - { - "IS2", '\036' - }, - { - "RS", '\036' - }, - { - "IS1", '\037' - }, - { - "US", '\037' - }, - { - "space", ' ' - }, - { - "exclamation-mark", '!' - }, - { - "quotation-mark", '"' - }, - { - "number-sign", '#' - }, - { - "dollar-sign", '$' - }, - { - "percent-sign", '%' - }, - { - "ampersand", '&' - }, - { - "apostrophe", '\'' - }, - { - "left-parenthesis", '(' - }, - { - "right-parenthesis", ')' - }, - { - "asterisk", '*' - }, - { - "plus-sign", '+' - }, - { - "comma", ',' - }, - { - "hyphen", '-' - }, - { - "hyphen-minus", '-' - }, - { - "period", '.' - }, - { - "full-stop", '.' - }, - { - "slash", '/' - }, - { - "solidus", '/' - }, - { - "zero", '0' - }, - { - "one", '1' - }, - { - "two", '2' - }, - { - "three", '3' - }, - { - "four", '4' - }, - { - "five", '5' - }, - { - "six", '6' - }, - { - "seven", '7' - }, - { - "eight", '8' - }, - { - "nine", '9' - }, - { - "colon", ':' - }, - { - "semicolon", ';' - }, - { - "less-than-sign", '<' - }, - { - "equals-sign", '=' - }, - { - "greater-than-sign", '>' - }, - { - "question-mark", '?' - }, - { - "commercial-at", '@' - }, - { - "left-square-bracket", '[' - }, - { - "backslash", '\\' - }, - { - "reverse-solidus", '\\' - }, - { - "right-square-bracket", ']' - }, - { - "circumflex", '^' - }, - { - "circumflex-accent", '^' - }, - { - "underscore", '_' - }, - { - "low-line", '_' - }, - { - "grave-accent", '`' - }, - { - "left-brace", '{' - }, - { - "left-curly-bracket", '{' - }, - { - "vertical-line", '|' - }, - { - "right-brace", '}' - }, - { - "right-curly-bracket", '}' - }, - { - "tilde", '~' - }, - { - "DEL", '\177' - }, - { - NULL, 0 - } -}; - -/* - * some ctype functions with non-ascii-char guard - */ -static int -wx_isdigit(wx_wchar c) -{ - return (c >= 0 && c <= UCHAR_MAX && isdigit((unsigned char) c)); -} - -static int -wx_isalpha(wx_wchar c) -{ - return (c >= 0 && c <= UCHAR_MAX && isalpha((unsigned char) c)); -} - -static int -wx_isalnum(wx_wchar c) -{ - return (c >= 0 && c <= UCHAR_MAX && isalnum((unsigned char) c)); -} - -static int -wx_isupper(wx_wchar c) -{ - return (c >= 0 && c <= UCHAR_MAX && isupper((unsigned char) c)); -} - -static int -wx_islower(wx_wchar c) -{ - return (c >= 0 && c <= UCHAR_MAX && islower((unsigned char) c)); -} - -static int -wx_isgraph(wx_wchar c) -{ - return (c >= 0 && c <= UCHAR_MAX && isgraph((unsigned char) c)); -} - -static int -wx_ispunct(wx_wchar c) -{ - return (c >= 0 && c <= UCHAR_MAX && ispunct((unsigned char) c)); -} - -static int -wx_isspace(wx_wchar c) -{ - return (c >= 0 && c <= UCHAR_MAX && isspace((unsigned char) c)); -} - -static wx_wchar -wx_toupper(wx_wchar c) -{ - if (c >= 0 && c <= UCHAR_MAX) - return toupper((unsigned char) c); - return c; -} - -static wx_wchar -wx_tolower(wx_wchar c) -{ - if (c >= 0 && c <= UCHAR_MAX) - return tolower((unsigned char) c); - return c; -} - - -/* - * nmcces - how many distinct MCCEs are there? - */ -static int -nmcces(struct vars * v) -{ - /* - * No multi-character collating elements defined at the moment. - */ - return 0; -} - -/* - * nleaders - how many chrs can be first chrs of MCCEs? - */ -static int -nleaders(struct vars * v) -{ - return 0; -} - -/* - * allmcces - return a cvec with all the MCCEs of the locale - */ -static struct cvec * -allmcces(struct vars * v, /* context */ - struct cvec * cv) /* this is supposed to have enough room */ -{ - return clearcvec(cv); -} - -/* - * element - map collating-element name to celt - */ -static celt -element(struct vars * v, /* context */ - chr *startp, /* points to start of name */ - chr *endp) /* points just past end of name */ -{ - struct cname *cn; - size_t len; - - /* generic: one-chr names stand for themselves */ - assert(startp < endp); - len = endp - startp; - if (len == 1) - return *startp; - - NOTE(REG_ULOCALE); - - /* search table */ - for (cn = cnames; cn->name != NULL; cn++) - { - if (strlen(cn->name) == len && - char_and_wchar_strncmp(cn->name, startp, len) == 0) - { - break; /* NOTE BREAK OUT */ - } - } - if (cn->name != NULL) - return CHR(cn->code); - - /* couldn't find it */ - ERR(REG_ECOLLATE); - return 0; -} - -/* - * range - supply cvec for a range, including legality check - */ -static struct cvec * -range(struct vars * v, /* context */ - celt a, /* range start */ - celt b, /* range end, might equal a */ - int cases) /* case-independent? */ -{ - int nchrs; - struct cvec *cv; - celt c, - lc, - uc; - - if (a != b && !before(a, b)) - { - ERR(REG_ERANGE); - return NULL; - } - - if (!cases) - { /* easy version */ - cv = getcvec(v, 0, 1, 0); - NOERRN(); - addrange(cv, a, b); - return cv; - } - - /* - * When case-independent, it's hard to decide when cvec ranges are - * usable, so for now at least, we won't try. We allocate enough - * space for two case variants plus a little extra for the two title - * case variants. - */ - - nchrs = (b - a + 1) * 2 + 4; - - cv = getcvec(v, nchrs, 0, 0); - NOERRN(); - - for (c = a; c <= b; c++) - { - addchr(cv, c); - lc = wx_tolower((chr) c); - if (c != lc) - addchr(cv, lc); - uc = wx_toupper((chr) c); - if (c != uc) - addchr(cv, uc); - } - - return cv; -} - -/* - * before - is celt x before celt y, for purposes of range legality? - */ -static int /* predicate */ -before(celt x, celt y) -{ - /* trivial because no MCCEs */ - if (x < y) - return 1; - return 0; -} - -/* - * eclass - supply cvec for an equivalence class - * Must include case counterparts on request. - */ -static struct cvec * -eclass(struct vars * v, /* context */ - celt c, /* Collating element representing the - * equivalence class. */ - int cases) /* all cases? */ -{ - struct cvec *cv; - - /* crude fake equivalence class for testing */ - if ((v->cflags & REG_FAKE) && c == 'x') - { - cv = getcvec(v, 4, 0, 0); - addchr(cv, (chr) 'x'); - addchr(cv, (chr) 'y'); - if (cases) - { - addchr(cv, (chr) 'X'); - addchr(cv, (chr) 'Y'); - } - return cv; - } - - /* otherwise, none */ - if (cases) - return allcases(v, c); - cv = getcvec(v, 1, 0, 0); - assert(cv != NULL); - addchr(cv, (chr) c); - return cv; -} - -/* - * cclass - supply cvec for a character class - * - * Must include case counterparts on request. - */ -static struct cvec * -cclass(struct vars * v, /* context */ - chr *startp, /* where the name starts */ - chr *endp, /* just past the end of the name */ - int cases) /* case-independent? */ -{ - size_t len; - struct cvec *cv = NULL; - char **namePtr; - int i, - index; - - /* - * The following arrays define the valid character class names. - */ - - static char *classNames[] = { - "alnum", "alpha", "ascii", "blank", "cntrl", "digit", "graph", - "lower", "print", "punct", "space", "upper", "xdigit", NULL - }; - - enum classes - { - CC_ALNUM, CC_ALPHA, CC_ASCII, CC_BLANK, CC_CNTRL, CC_DIGIT, CC_GRAPH, - CC_LOWER, CC_PRINT, CC_PUNCT, CC_SPACE, CC_UPPER, CC_XDIGIT - }; - - /* - * Map the name to the corresponding enumerated value. - */ - len = endp - startp; - index = -1; - for (namePtr = classNames, i = 0; *namePtr != NULL; namePtr++, i++) - { - if (strlen(*namePtr) == len && - char_and_wchar_strncmp(*namePtr, startp, len) == 0) - { - index = i; - break; - } - } - if (index == -1) - { - ERR(REG_ECTYPE); - return NULL; - } - - /* - * Remap lower and upper to alpha if the match is case insensitive. - */ - - if (cases && - ((enum classes) index == CC_LOWER || - (enum classes) index == CC_UPPER)) - index = (int) CC_ALPHA; - - /* - * Now compute the character class contents. - * - * For the moment, assume that only char codes < 256 can be in these - * classes. - */ - - switch ((enum classes) index) - { - case CC_PRINT: - case CC_ALNUM: - cv = getcvec(v, UCHAR_MAX, 1, 0); - if (cv) - { - for (i = 0; i <= UCHAR_MAX; i++) - { - if (wx_isalpha((chr) i)) - addchr(cv, (chr) i); - } - addrange(cv, (chr) '0', (chr) '9'); - } - break; - case CC_ALPHA: - cv = getcvec(v, UCHAR_MAX, 0, 0); - if (cv) - { - for (i = 0; i <= UCHAR_MAX; i++) - { - if (wx_isalpha((chr) i)) - addchr(cv, (chr) i); - } - } - break; - case CC_ASCII: - cv = getcvec(v, 0, 1, 0); - if (cv) - addrange(cv, 0, 0x7f); - break; - case CC_BLANK: - cv = getcvec(v, 2, 0, 0); - addchr(cv, '\t'); - addchr(cv, ' '); - break; - case CC_CNTRL: - cv = getcvec(v, 0, 2, 0); - addrange(cv, 0x0, 0x1f); - addrange(cv, 0x7f, 0x9f); - break; - case CC_DIGIT: - cv = getcvec(v, 0, 1, 0); - if (cv) - addrange(cv, (chr) '0', (chr) '9'); - break; - case CC_PUNCT: - cv = getcvec(v, UCHAR_MAX, 0, 0); - if (cv) - { - for (i = 0; i <= UCHAR_MAX; i++) - { - if (wx_ispunct((chr) i)) - addchr(cv, (chr) i); - } - } - break; - case CC_XDIGIT: - cv = getcvec(v, 0, 3, 0); - if (cv) - { - addrange(cv, '0', '9'); - addrange(cv, 'a', 'f'); - addrange(cv, 'A', 'F'); - } - break; - case CC_SPACE: - cv = getcvec(v, UCHAR_MAX, 0, 0); - if (cv) - { - for (i = 0; i <= UCHAR_MAX; i++) - { - if (wx_isspace((chr) i)) - addchr(cv, (chr) i); - } - } - break; - case CC_LOWER: - cv = getcvec(v, UCHAR_MAX, 0, 0); - if (cv) - { - for (i = 0; i <= UCHAR_MAX; i++) - { - if (wx_islower((chr) i)) - addchr(cv, (chr) i); - } - } - break; - case CC_UPPER: - cv = getcvec(v, UCHAR_MAX, 0, 0); - if (cv) - { - for (i = 0; i <= UCHAR_MAX; i++) - { - if (wx_isupper((chr) i)) - addchr(cv, (chr) i); - } - } - break; - case CC_GRAPH: - cv = getcvec(v, UCHAR_MAX, 0, 0); - if (cv) - { - for (i = 0; i <= UCHAR_MAX; i++) - { - if (wx_isgraph((chr) i)) - addchr(cv, (chr) i); - } - } - break; - } - if (cv == NULL) - ERR(REG_ESPACE); - return cv; -} - -/* - * allcases - supply cvec for all case counterparts of a chr (including itself) - * - * This is a shortcut, preferably an efficient one, for simple characters; - * messy cases are done via range(). - */ -static struct cvec * -allcases(struct vars * v, /* context */ - chr pc) /* character to get case equivs of */ -{ - struct cvec *cv; - chr c = (chr) pc; - chr lc, - uc; - - lc = wx_tolower((chr) c); - uc = wx_toupper((chr) c); - - cv = getcvec(v, 2, 0, 0); - addchr(cv, lc); - if (lc != uc) - addchr(cv, uc); - return cv; -} - -/* - * cmp - chr-substring compare - * - * Backrefs need this. It should preferably be efficient. - * Note that it does not need to report anything except equal/unequal. - * Note also that the length is exact, and the comparison should not - * stop at embedded NULs! - */ -static int /* 0 for equal, nonzero for unequal */ -cmp(const chr *x, const chr *y, /* strings to compare */ - size_t len) /* exact length of comparison */ -{ - return memcmp(VS(x), VS(y), len * sizeof(chr)); -} - -/* - * casecmp - case-independent chr-substring compare - * - * REG_ICASE backrefs need this. It should preferably be efficient. - * Note that it does not need to report anything except equal/unequal. - * Note also that the length is exact, and the comparison should not - * stop at embedded NULs! - */ -static int /* 0 for equal, nonzero for unequal */ -casecmp(const chr *x, const chr *y, /* strings to compare */ - size_t len) /* exact length of comparison */ -{ - for (; len > 0; len--, x++, y++) - { - if ((*x != *y) && (wx_tolower(*x) != wx_tolower(*y))) - return 1; - } - return 0; -} diff --git a/src/regex/regc_nfa.c b/src/regex/regc_nfa.c deleted file mode 100644 index cc9f6ea2f9..0000000000 --- a/src/regex/regc_nfa.c +++ /dev/null @@ -1,1559 +0,0 @@ -/* - * NFA utilities. - * This file is #included by regcomp.c. - * - * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. - * - * Development of this software was funded, in part, by Cray Research Inc., - * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics - * Corporation, none of whom are responsible for the results. The author - * thanks all of them. - * - * Redistribution and use in source and binary forms -- with or without - * modification -- are permitted for any purpose, provided that - * redistributions in source form retain this entire copyright notice and - * indicate the origin and nature of any modifications. - * - * I'd appreciate being given credit for this package in the documentation - * of software which uses it, but that is not a requirement. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, - * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY - * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL - * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; - * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR - * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF - * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * $Header$ - * - * - * One or two things that technically ought to be in here - * are actually in color.c, thanks to some incestuous relationships in - * the color chains. - */ - -#define NISERR() VISERR(nfa->v) -#define NERR(e) VERR(nfa->v, (e)) - - -/* - * newnfa - set up an NFA - */ -static struct nfa * /* the NFA, or NULL */ -newnfa(struct vars * v, - struct colormap * cm, - struct nfa * parent) /* NULL if primary NFA */ -{ - struct nfa *nfa; - - nfa = (struct nfa *) MALLOC(sizeof(struct nfa)); - if (nfa == NULL) - return NULL; - - nfa->states = NULL; - nfa->slast = NULL; - nfa->free = NULL; - nfa->nstates = 0; - nfa->cm = cm; - nfa->v = v; - nfa->bos[0] = nfa->bos[1] = COLORLESS; - nfa->eos[0] = nfa->eos[1] = COLORLESS; - nfa->post = newfstate(nfa, '@'); /* number 0 */ - nfa->pre = newfstate(nfa, '>'); /* number 1 */ - nfa->parent = parent; - - nfa->init = newstate(nfa); /* may become invalid later */ - nfa->final = newstate(nfa); - if (ISERR()) - { - freenfa(nfa); - return NULL; - } - rainbow(nfa, nfa->cm, PLAIN, COLORLESS, nfa->pre, nfa->init); - newarc(nfa, '^', 1, nfa->pre, nfa->init); - newarc(nfa, '^', 0, nfa->pre, nfa->init); - rainbow(nfa, nfa->cm, PLAIN, COLORLESS, nfa->final, nfa->post); - newarc(nfa, '$', 1, nfa->final, nfa->post); - newarc(nfa, '$', 0, nfa->final, nfa->post); - - if (ISERR()) - { - freenfa(nfa); - return NULL; - } - return nfa; -} - -/* - * freenfa - free an entire NFA - */ -static void -freenfa(struct nfa * nfa) -{ - struct state *s; - - while ((s = nfa->states) != NULL) - { - s->nins = s->nouts = 0; /* don't worry about arcs */ - freestate(nfa, s); - } - while ((s = nfa->free) != NULL) - { - nfa->free = s->next; - destroystate(nfa, s); - } - - nfa->slast = NULL; - nfa->nstates = -1; - nfa->pre = NULL; - nfa->post = NULL; - FREE(nfa); -} - -/* - * newstate - allocate an NFA state, with zero flag value - */ -static struct state * /* NULL on error */ -newstate(struct nfa * nfa) -{ - struct state *s; - - if (nfa->free != NULL) - { - s = nfa->free; - nfa->free = s->next; - } - else - { - s = (struct state *) MALLOC(sizeof(struct state)); - if (s == NULL) - { - NERR(REG_ESPACE); - return NULL; - } - s->oas.next = NULL; - s->free = NULL; - s->noas = 0; - } - - assert(nfa->nstates >= 0); - s->no = nfa->nstates++; - s->flag = 0; - if (nfa->states == NULL) - nfa->states = s; - s->nins = 0; - s->ins = NULL; - s->nouts = 0; - s->outs = NULL; - s->tmp = NULL; - s->next = NULL; - if (nfa->slast != NULL) - { - assert(nfa->slast->next == NULL); - nfa->slast->next = s; - } - s->prev = nfa->slast; - nfa->slast = s; - return s; -} - -/* - * newfstate - allocate an NFA state with a specified flag value - */ -static struct state * /* NULL on error */ -newfstate(struct nfa * nfa, int flag) -{ - struct state *s; - - s = newstate(nfa); - if (s != NULL) - s->flag = (char) flag; - return s; -} - -/* - * dropstate - delete a state's inarcs and outarcs and free it - */ -static void -dropstate(struct nfa * nfa, - struct state * s) -{ - struct arc *a; - - while ((a = s->ins) != NULL) - freearc(nfa, a); - while ((a = s->outs) != NULL) - freearc(nfa, a); - freestate(nfa, s); -} - -/* - * freestate - free a state, which has no in-arcs or out-arcs - */ -static void -freestate(struct nfa * nfa, - struct state * s) -{ - assert(s != NULL); - assert(s->nins == 0 && s->nouts == 0); - - s->no = FREESTATE; - s->flag = 0; - if (s->next != NULL) - s->next->prev = s->prev; - else - { - assert(s == nfa->slast); - nfa->slast = s->prev; - } - if (s->prev != NULL) - s->prev->next = s->next; - else - { - assert(s == nfa->states); - nfa->states = s->next; - } - s->prev = NULL; - s->next = nfa->free; /* don't delete it, put it on the free - * list */ - nfa->free = s; -} - -/* - * destroystate - really get rid of an already-freed state - */ -static void -destroystate(struct nfa * nfa, - struct state * s) -{ - struct arcbatch *ab; - struct arcbatch *abnext; - - assert(s->no == FREESTATE); - for (ab = s->oas.next; ab != NULL; ab = abnext) - { - abnext = ab->next; - FREE(ab); - } - s->ins = NULL; - s->outs = NULL; - s->next = NULL; - FREE(s); -} - -/* - * newarc - set up a new arc within an NFA - */ -static void -newarc(struct nfa * nfa, - int t, - pcolor co, - struct state * from, - struct state * to) -{ - struct arc *a; - - assert(from != NULL && to != NULL); - - /* check for duplicates */ - for (a = from->outs; a != NULL; a = a->outchain) - if (a->to == to && a->co == co && a->type == t) - return; - - a = allocarc(nfa, from); - if (NISERR()) - return; - assert(a != NULL); - - a->type = t; - a->co = (color) co; - a->to = to; - a->from = from; - - /* - * Put the new arc on the beginning, not the end, of the chains. Not - * only is this easier, it has the very useful side effect that - * deleting the most-recently-added arc is the cheapest case rather - * than the most expensive one. - */ - a->inchain = to->ins; - to->ins = a; - a->outchain = from->outs; - from->outs = a; - - from->nouts++; - to->nins++; - - if (COLORED(a) && nfa->parent == NULL) - colorchain(nfa->cm, a); - - return; -} - -/* - * allocarc - allocate a new out-arc within a state - */ -static struct arc * /* NULL for failure */ -allocarc(struct nfa * nfa, - struct state * s) -{ - struct arc *a; - struct arcbatch *new; - int i; - - /* shortcut */ - if (s->free == NULL && s->noas < ABSIZE) - { - a = &s->oas.a[s->noas]; - s->noas++; - return a; - } - - /* if none at hand, get more */ - if (s->free == NULL) - { - new = (struct arcbatch *) MALLOC(sizeof(struct arcbatch)); - if (new == NULL) - { - NERR(REG_ESPACE); - return NULL; - } - new->next = s->oas.next; - s->oas.next = new; - - for (i = 0; i < ABSIZE; i++) - { - new->a[i].type = 0; - new->a[i].freechain = &new->a[i + 1]; - } - new->a[ABSIZE - 1].freechain = NULL; - s->free = &new->a[0]; - } - assert(s->free != NULL); - - a = s->free; - s->free = a->freechain; - return a; -} - -/* - * freearc - free an arc - */ -static void -freearc(struct nfa * nfa, - struct arc * victim) -{ - struct state *from = victim->from; - struct state *to = victim->to; - struct arc *a; - - assert(victim->type != 0); - - /* take it off color chain if necessary */ - if (COLORED(victim) && nfa->parent == NULL) - uncolorchain(nfa->cm, victim); - - /* take it off source's out-chain */ - assert(from != NULL); - assert(from->outs != NULL); - a = from->outs; - if (a == victim) /* simple case: first in chain */ - from->outs = victim->outchain; - else - { - for (; a != NULL && a->outchain != victim; a = a->outchain) - continue; - assert(a != NULL); - a->outchain = victim->outchain; - } - from->nouts--; - - /* take it off target's in-chain */ - assert(to != NULL); - assert(to->ins != NULL); - a = to->ins; - if (a == victim) /* simple case: first in chain */ - to->ins = victim->inchain; - else - { - for (; a != NULL && a->inchain != victim; a = a->inchain) - continue; - assert(a != NULL); - a->inchain = victim->inchain; - } - to->nins--; - - /* clean up and place on free list */ - victim->type = 0; - victim->from = NULL; /* precautions... */ - victim->to = NULL; - victim->inchain = NULL; - victim->outchain = NULL; - victim->freechain = from->free; - from->free = victim; -} - -/* - * findarc - find arc, if any, from given source with given type and color - * If there is more than one such arc, the result is random. - */ -static struct arc * -findarc(struct state * s, - int type, - pcolor co) -{ - struct arc *a; - - for (a = s->outs; a != NULL; a = a->outchain) - if (a->type == type && a->co == co) - return a; - return NULL; -} - -/* - * cparc - allocate a new arc within an NFA, copying details from old one - */ -static void -cparc(struct nfa * nfa, - struct arc * oa, - struct state * from, - struct state * to) -{ - newarc(nfa, oa->type, oa->co, from, to); -} - -/* - * moveins - move all in arcs of a state to another state - * - * You might think this could be done better by just updating the - * existing arcs, and you would be right if it weren't for the desire - * for duplicate suppression, which makes it easier to just make new - * ones to exploit the suppression built into newarc. - */ -static void -moveins(struct nfa * nfa, - struct state * old, - struct state * new) -{ - struct arc *a; - - assert(old != new); - - while ((a = old->ins) != NULL) - { - cparc(nfa, a, a->from, new); - freearc(nfa, a); - } - assert(old->nins == 0); - assert(old->ins == NULL); -} - -/* - * copyins - copy all in arcs of a state to another state - */ -static void -copyins(struct nfa * nfa, - struct state * old, - struct state * new) -{ - struct arc *a; - - assert(old != new); - - for (a = old->ins; a != NULL; a = a->inchain) - cparc(nfa, a, a->from, new); -} - -/* - * moveouts - move all out arcs of a state to another state - */ -static void -moveouts(struct nfa * nfa, - struct state * old, - struct state * new) -{ - struct arc *a; - - assert(old != new); - - while ((a = old->outs) != NULL) - { - cparc(nfa, a, new, a->to); - freearc(nfa, a); - } -} - -/* - * copyouts - copy all out arcs of a state to another state - */ -static void -copyouts(struct nfa * nfa, - struct state * old, - struct state * new) -{ - struct arc *a; - - assert(old != new); - - for (a = old->outs; a != NULL; a = a->outchain) - cparc(nfa, a, new, a->to); -} - -/* - * cloneouts - copy out arcs of a state to another state pair, modifying type - */ -static void -cloneouts(struct nfa * nfa, - struct state * old, - struct state * from, - struct state * to, - int type) -{ - struct arc *a; - - assert(old != from); - - for (a = old->outs; a != NULL; a = a->outchain) - newarc(nfa, type, a->co, from, to); -} - -/* - * delsub - delete a sub-NFA, updating subre pointers if necessary - * - * This uses a recursive traversal of the sub-NFA, marking already-seen - * states using their tmp pointer. - */ -static void -delsub(struct nfa * nfa, - struct state * lp, /* the sub-NFA goes from here... */ - struct state * rp) /* ...to here, *not* inclusive */ -{ - assert(lp != rp); - - rp->tmp = rp; /* mark end */ - - deltraverse(nfa, lp, lp); - assert(lp->nouts == 0 && rp->nins == 0); /* did the job */ - assert(lp->no != FREESTATE && rp->no != FREESTATE); /* no more */ - - rp->tmp = NULL; /* unmark end */ - lp->tmp = NULL; /* and begin, marked by deltraverse */ -} - -/* - * deltraverse - the recursive heart of delsub - * This routine's basic job is to destroy all out-arcs of the state. - */ -static void -deltraverse(struct nfa * nfa, - struct state * leftend, - struct state * s) -{ - struct arc *a; - struct state *to; - - if (s->nouts == 0) - return; /* nothing to do */ - if (s->tmp != NULL) - return; /* already in progress */ - - s->tmp = s; /* mark as in progress */ - - while ((a = s->outs) != NULL) - { - to = a->to; - deltraverse(nfa, leftend, to); - assert(to->nouts == 0 || to->tmp != NULL); - freearc(nfa, a); - if (to->nins == 0 && to->tmp == NULL) - { - assert(to->nouts == 0); - freestate(nfa, to); - } - } - - assert(s->no != FREESTATE); /* we're still here */ - assert(s == leftend || s->nins != 0); /* and still reachable */ - assert(s->nouts == 0); /* but have no outarcs */ - - s->tmp = NULL; /* we're done here */ -} - -/* - * dupnfa - duplicate sub-NFA - * - * Another recursive traversal, this time using tmp to point to duplicates - * as well as mark already-seen states. (You knew there was a reason why - * it's a state pointer, didn't you? :-)) - */ -static void -dupnfa(struct nfa * nfa, - struct state * start, /* duplicate of subNFA starting here */ - struct state * stop, /* and stopping here */ - struct state * from, /* stringing duplicate from here */ - struct state * to) /* to here */ -{ - if (start == stop) - { - newarc(nfa, EMPTY, 0, from, to); - return; - } - - stop->tmp = to; - duptraverse(nfa, start, from); - /* done, except for clearing out the tmp pointers */ - - stop->tmp = NULL; - cleartraverse(nfa, start); -} - -/* - * duptraverse - recursive heart of dupnfa - */ -static void -duptraverse(struct nfa * nfa, - struct state * s, - struct state * stmp) /* s's duplicate, or NULL */ -{ - struct arc *a; - - if (s->tmp != NULL) - return; /* already done */ - - s->tmp = (stmp == NULL) ? newstate(nfa) : stmp; - if (s->tmp == NULL) - { - assert(NISERR()); - return; - } - - for (a = s->outs; a != NULL && !NISERR(); a = a->outchain) - { - duptraverse(nfa, a->to, (struct state *) NULL); - assert(a->to->tmp != NULL); - cparc(nfa, a, s->tmp, a->to->tmp); - } -} - -/* - * cleartraverse - recursive cleanup for algorithms that leave tmp ptrs set - */ -static void -cleartraverse(struct nfa * nfa, - struct state * s) -{ - struct arc *a; - - if (s->tmp == NULL) - return; - s->tmp = NULL; - - for (a = s->outs; a != NULL; a = a->outchain) - cleartraverse(nfa, a->to); -} - -/* - * specialcolors - fill in special colors for an NFA - */ -static void -specialcolors(struct nfa * nfa) -{ - /* false colors for BOS, BOL, EOS, EOL */ - if (nfa->parent == NULL) - { - nfa->bos[0] = pseudocolor(nfa->cm); - nfa->bos[1] = pseudocolor(nfa->cm); - nfa->eos[0] = pseudocolor(nfa->cm); - nfa->eos[1] = pseudocolor(nfa->cm); - } - else - { - assert(nfa->parent->bos[0] != COLORLESS); - nfa->bos[0] = nfa->parent->bos[0]; - assert(nfa->parent->bos[1] != COLORLESS); - nfa->bos[1] = nfa->parent->bos[1]; - assert(nfa->parent->eos[0] != COLORLESS); - nfa->eos[0] = nfa->parent->eos[0]; - assert(nfa->parent->eos[1] != COLORLESS); - nfa->eos[1] = nfa->parent->eos[1]; - } -} - -/* - * optimize - optimize an NFA - */ -static long /* re_info bits */ -optimize(struct nfa * nfa, - FILE *f) /* for debug output; NULL none */ -{ -#ifdef REG_DEBUG - int verbose = (f != NULL) ? 1 : 0; - - if (verbose) - fprintf(f, "\ninitial cleanup:\n"); -#endif - cleanup(nfa); /* may simplify situation */ -#ifdef REG_DEBUG - if (verbose) - dumpnfa(nfa, f); - if (verbose) - fprintf(f, "\nempties:\n"); -#endif - fixempties(nfa, f); /* get rid of EMPTY arcs */ -#ifdef REG_DEBUG - if (verbose) - fprintf(f, "\nconstraints:\n"); -#endif - pullback(nfa, f); /* pull back constraints backward */ - pushfwd(nfa, f); /* push fwd constraints forward */ -#ifdef REG_DEBUG - if (verbose) - fprintf(f, "\nfinal cleanup:\n"); -#endif - cleanup(nfa); /* final tidying */ - return analyze(nfa); /* and analysis */ -} - -/* - * pullback - pull back constraints backward to (with luck) eliminate them - */ -static void -pullback(struct nfa * nfa, - FILE *f) /* for debug output; NULL none */ -{ - struct state *s; - struct state *nexts; - struct arc *a; - struct arc *nexta; - int progress; - - /* find and pull until there are no more */ - do - { - progress = 0; - for (s = nfa->states; s != NULL && !NISERR(); s = nexts) - { - nexts = s->next; - for (a = s->outs; a != NULL && !NISERR(); a = nexta) - { - nexta = a->outchain; - if (a->type == '^' || a->type == BEHIND) - if (pull(nfa, a)) - progress = 1; - assert(nexta == NULL || s->no != FREESTATE); - } - } - if (progress && f != NULL) - dumpnfa(nfa, f); - } while (progress && !NISERR()); - if (NISERR()) - return; - - for (a = nfa->pre->outs; a != NULL; a = nexta) - { - nexta = a->outchain; - if (a->type == '^') - { - assert(a->co == 0 || a->co == 1); - newarc(nfa, PLAIN, nfa->bos[a->co], a->from, a->to); - freearc(nfa, a); - } - } -} - -/* - * pull - pull a back constraint backward past its source state - * A significant property of this function is that it deletes at most - * one state -- the constraint's from state -- and only if the constraint - * was that state's last outarc. - */ -static int /* 0 couldn't, 1 could */ -pull(struct nfa * nfa, - struct arc * con) -{ - struct state *from = con->from; - struct state *to = con->to; - struct arc *a; - struct arc *nexta; - struct state *s; - - if (from == to) - { /* circular constraint is pointless */ - freearc(nfa, con); - return 1; - } - if (from->flag) /* can't pull back beyond start */ - return 0; - if (from->nins == 0) - { /* unreachable */ - freearc(nfa, con); - return 1; - } - - /* first, clone from state if necessary to avoid other outarcs */ - if (from->nouts > 1) - { - s = newstate(nfa); - if (NISERR()) - return 0; - assert(to != from); /* con is not an inarc */ - copyins(nfa, from, s); /* duplicate inarcs */ - cparc(nfa, con, s, to); /* move constraint arc */ - freearc(nfa, con); - from = s; - con = from->outs; - } - assert(from->nouts == 1); - - /* propagate the constraint into the from state's inarcs */ - for (a = from->ins; a != NULL; a = nexta) - { - nexta = a->inchain; - switch (combine(con, a)) - { - case INCOMPATIBLE: /* destroy the arc */ - freearc(nfa, a); - break; - case SATISFIED: /* no action needed */ - break; - case COMPATIBLE: /* swap the two arcs, more or less */ - s = newstate(nfa); - if (NISERR()) - return 0; - cparc(nfa, a, s, to); /* anticipate move */ - cparc(nfa, con, a->from, s); - if (NISERR()) - return 0; - freearc(nfa, a); - break; - default: - assert(NOTREACHED); - break; - } - } - - /* remaining inarcs, if any, incorporate the constraint */ - moveins(nfa, from, to); - dropstate(nfa, from); /* will free the constraint */ - return 1; -} - -/* - * pushfwd - push forward constraints forward to (with luck) eliminate them - */ -static void -pushfwd(struct nfa * nfa, - FILE *f) /* for debug output; NULL none */ -{ - struct state *s; - struct state *nexts; - struct arc *a; - struct arc *nexta; - int progress; - - /* find and push until there are no more */ - do - { - progress = 0; - for (s = nfa->states; s != NULL && !NISERR(); s = nexts) - { - nexts = s->next; - for (a = s->ins; a != NULL && !NISERR(); a = nexta) - { - nexta = a->inchain; - if (a->type == '$' || a->type == AHEAD) - if (push(nfa, a)) - progress = 1; - assert(nexta == NULL || s->no != FREESTATE); - } - } - if (progress && f != NULL) - dumpnfa(nfa, f); - } while (progress && !NISERR()); - if (NISERR()) - return; - - for (a = nfa->post->ins; a != NULL; a = nexta) - { - nexta = a->inchain; - if (a->type == '$') - { - assert(a->co == 0 || a->co == 1); - newarc(nfa, PLAIN, nfa->eos[a->co], a->from, a->to); - freearc(nfa, a); - } - } -} - -/* - * push - push a forward constraint forward past its destination state - * A significant property of this function is that it deletes at most - * one state -- the constraint's to state -- and only if the constraint - * was that state's last inarc. - */ -static int /* 0 couldn't, 1 could */ -push(struct nfa * nfa, - struct arc * con) -{ - struct state *from = con->from; - struct state *to = con->to; - struct arc *a; - struct arc *nexta; - struct state *s; - - if (to == from) - { /* circular constraint is pointless */ - freearc(nfa, con); - return 1; - } - if (to->flag) /* can't push forward beyond end */ - return 0; - if (to->nouts == 0) - { /* dead end */ - freearc(nfa, con); - return 1; - } - - /* first, clone to state if necessary to avoid other inarcs */ - if (to->nins > 1) - { - s = newstate(nfa); - if (NISERR()) - return 0; - copyouts(nfa, to, s); /* duplicate outarcs */ - cparc(nfa, con, from, s); /* move constraint */ - freearc(nfa, con); - to = s; - con = to->ins; - } - assert(to->nins == 1); - - /* propagate the constraint into the to state's outarcs */ - for (a = to->outs; a != NULL; a = nexta) - { - nexta = a->outchain; - switch (combine(con, a)) - { - case INCOMPATIBLE: /* destroy the arc */ - freearc(nfa, a); - break; - case SATISFIED: /* no action needed */ - break; - case COMPATIBLE: /* swap the two arcs, more or less */ - s = newstate(nfa); - if (NISERR()) - return 0; - cparc(nfa, con, s, a->to); /* anticipate move */ - cparc(nfa, a, from, s); - if (NISERR()) - return 0; - freearc(nfa, a); - break; - default: - assert(NOTREACHED); - break; - } - } - - /* remaining outarcs, if any, incorporate the constraint */ - moveouts(nfa, to, from); - dropstate(nfa, to); /* will free the constraint */ - return 1; -} - -/* - * combine - constraint lands on an arc, what happens? - * - * #def INCOMPATIBLE 1 // destroys arc - * #def SATISFIED 2 // constraint satisfied - * #def COMPATIBLE 3 // compatible but not satisfied yet - */ -static int -combine(struct arc * con, - struct arc * a) -{ -#define CA(ct,at) (((ct)<type, a->type)) - { - case CA('^', PLAIN): /* newlines are handled separately */ - case CA('$', PLAIN): - return INCOMPATIBLE; - break; - case CA(AHEAD, PLAIN): /* color constraints meet colors */ - case CA(BEHIND, PLAIN): - if (con->co == a->co) - return SATISFIED; - return INCOMPATIBLE; - break; - case CA('^', '^'): /* collision, similar constraints */ - case CA('$', '$'): - case CA(AHEAD, AHEAD): - case CA(BEHIND, BEHIND): - if (con->co == a->co) /* true duplication */ - return SATISFIED; - return INCOMPATIBLE; - break; - case CA('^', BEHIND): /* collision, dissimilar constraints */ - case CA(BEHIND, '^'): - case CA('$', AHEAD): - case CA(AHEAD, '$'): - return INCOMPATIBLE; - break; - case CA('^', '$'): /* constraints passing each other */ - case CA('^', AHEAD): - case CA(BEHIND, '$'): - case CA(BEHIND, AHEAD): - case CA('$', '^'): - case CA('$', BEHIND): - case CA(AHEAD, '^'): - case CA(AHEAD, BEHIND): - case CA('^', LACON): - case CA(BEHIND, LACON): - case CA('$', LACON): - case CA(AHEAD, LACON): - return COMPATIBLE; - break; - } - assert(NOTREACHED); - return INCOMPATIBLE; /* for benefit of blind compilers */ -} - -/* - * fixempties - get rid of EMPTY arcs - */ -static void -fixempties(struct nfa * nfa, - FILE *f) /* for debug output; NULL none */ -{ - struct state *s; - struct state *nexts; - struct arc *a; - struct arc *nexta; - int progress; - - /* find and eliminate empties until there are no more */ - do - { - progress = 0; - for (s = nfa->states; s != NULL && !NISERR(); s = nexts) - { - nexts = s->next; - for (a = s->outs; a != NULL && !NISERR(); a = nexta) - { - nexta = a->outchain; - if (a->type == EMPTY && unempty(nfa, a)) - progress = 1; - assert(nexta == NULL || s->no != FREESTATE); - } - } - if (progress && f != NULL) - dumpnfa(nfa, f); - } while (progress && !NISERR()); -} - -/* - * unempty - optimize out an EMPTY arc, if possible - * - * Actually, as it stands this function always succeeds, but the return - * value is kept with an eye on possible future changes. - */ -static int /* 0 couldn't, 1 could */ -unempty(struct nfa * nfa, - struct arc * a) -{ - struct state *from = a->from; - struct state *to = a->to; - int usefrom; /* work on from, as opposed to to? */ - - assert(a->type == EMPTY); - assert(from != nfa->pre && to != nfa->post); - - if (from == to) - { /* vacuous loop */ - freearc(nfa, a); - return 1; - } - - /* decide which end to work on */ - usefrom = 1; /* default: attack from */ - if (from->nouts > to->nins) - usefrom = 0; - else if (from->nouts == to->nins) - { - /* decide on secondary issue: move/copy fewest arcs */ - if (from->nins > to->nouts) - usefrom = 0; - } - - freearc(nfa, a); - if (usefrom) - { - if (from->nouts == 0) - { - /* was the state's only outarc */ - moveins(nfa, from, to); - freestate(nfa, from); - } - else - copyins(nfa, from, to); - } - else - { - if (to->nins == 0) - { - /* was the state's only inarc */ - moveouts(nfa, to, from); - freestate(nfa, to); - } - else - copyouts(nfa, to, from); - } - - return 1; -} - -/* - * cleanup - clean up NFA after optimizations - */ -static void -cleanup(struct nfa * nfa) -{ - struct state *s; - struct state *nexts; - int n; - - /* clear out unreachable or dead-end states */ - /* use pre to mark reachable, then post to mark can-reach-post */ - markreachable(nfa, nfa->pre, (struct state *) NULL, nfa->pre); - markcanreach(nfa, nfa->post, nfa->pre, nfa->post); - for (s = nfa->states; s != NULL; s = nexts) - { - nexts = s->next; - if (s->tmp != nfa->post && !s->flag) - dropstate(nfa, s); - } - assert(nfa->post->nins == 0 || nfa->post->tmp == nfa->post); - cleartraverse(nfa, nfa->pre); - assert(nfa->post->nins == 0 || nfa->post->tmp == NULL); - /* the nins==0 (final unreachable) case will be caught later */ - - /* renumber surviving states */ - n = 0; - for (s = nfa->states; s != NULL; s = s->next) - s->no = n++; - nfa->nstates = n; -} - -/* - * markreachable - recursive marking of reachable states - */ -static void -markreachable(struct nfa * nfa, - struct state * s, - struct state * okay, /* consider only states with this - * mark */ - struct state * mark) /* the value to mark with */ -{ - struct arc *a; - - if (s->tmp != okay) - return; - s->tmp = mark; - - for (a = s->outs; a != NULL; a = a->outchain) - markreachable(nfa, a->to, okay, mark); -} - -/* - * markcanreach - recursive marking of states which can reach here - */ -static void -markcanreach(struct nfa * nfa, - struct state * s, - struct state * okay, /* consider only states with this - * mark */ - struct state * mark) /* the value to mark with */ -{ - struct arc *a; - - if (s->tmp != okay) - return; - s->tmp = mark; - - for (a = s->ins; a != NULL; a = a->inchain) - markcanreach(nfa, a->from, okay, mark); -} - -/* - * analyze - ascertain potentially-useful facts about an optimized NFA - */ -static long /* re_info bits to be ORed in */ -analyze(struct nfa * nfa) -{ - struct arc *a; - struct arc *aa; - - if (nfa->pre->outs == NULL) - return REG_UIMPOSSIBLE; - for (a = nfa->pre->outs; a != NULL; a = a->outchain) - for (aa = a->to->outs; aa != NULL; aa = aa->outchain) - if (aa->to == nfa->post) - return REG_UEMPTYMATCH; - return 0; -} - -/* - * compact - compact an NFA - */ -static void -compact(struct nfa * nfa, - struct cnfa * cnfa) -{ - struct state *s; - struct arc *a; - size_t nstates; - size_t narcs; - struct carc *ca; - struct carc *first; - - assert(!NISERR()); - - nstates = 0; - narcs = 0; - for (s = nfa->states; s != NULL; s = s->next) - { - nstates++; - narcs += 1 + s->nouts + 1; - /* 1 as a fake for flags, nouts for arcs, 1 as endmarker */ - } - - cnfa->states = (struct carc **) MALLOC(nstates * sizeof(struct carc *)); - cnfa->arcs = (struct carc *) MALLOC(narcs * sizeof(struct carc)); - if (cnfa->states == NULL || cnfa->arcs == NULL) - { - if (cnfa->states != NULL) - FREE(cnfa->states); - if (cnfa->arcs != NULL) - FREE(cnfa->arcs); - NERR(REG_ESPACE); - return; - } - cnfa->nstates = nstates; - cnfa->pre = nfa->pre->no; - cnfa->post = nfa->post->no; - cnfa->bos[0] = nfa->bos[0]; - cnfa->bos[1] = nfa->bos[1]; - cnfa->eos[0] = nfa->eos[0]; - cnfa->eos[1] = nfa->eos[1]; - cnfa->ncolors = maxcolor(nfa->cm) + 1; - cnfa->flags = 0; - - ca = cnfa->arcs; - for (s = nfa->states; s != NULL; s = s->next) - { - assert((size_t) s->no < nstates); - cnfa->states[s->no] = ca; - ca->co = 0; /* clear and skip flags "arc" */ - ca++; - first = ca; - for (a = s->outs; a != NULL; a = a->outchain) - switch (a->type) - { - case PLAIN: - ca->co = a->co; - ca->to = a->to->no; - ca++; - break; - case LACON: - assert(s->no != cnfa->pre); - ca->co = (color) (cnfa->ncolors + a->co); - ca->to = a->to->no; - ca++; - cnfa->flags |= HASLACONS; - break; - default: - assert(NOTREACHED); - break; - } - carcsort(first, ca - 1); - ca->co = COLORLESS; - ca->to = 0; - ca++; - } - assert(ca == &cnfa->arcs[narcs]); - assert(cnfa->nstates != 0); - - /* mark no-progress states */ - for (a = nfa->pre->outs; a != NULL; a = a->outchain) - cnfa->states[a->to->no]->co = 1; - cnfa->states[nfa->pre->no]->co = 1; -} - -/* - * carcsort - sort compacted-NFA arcs by color - * - * Really dumb algorithm, but if the list is long enough for that to matter, - * you're in real trouble anyway. - */ -static void -carcsort(struct carc * first, - struct carc * last) -{ - struct carc *p; - struct carc *q; - struct carc tmp; - - if (last - first <= 1) - return; - - for (p = first; p <= last; p++) - for (q = p; q <= last; q++) - if (p->co > q->co || - (p->co == q->co && p->to > q->to)) - { - assert(p != q); - tmp = *p; - *p = *q; - *q = tmp; - } -} - -/* - * freecnfa - free a compacted NFA - */ -static void -freecnfa(struct cnfa * cnfa) -{ - assert(cnfa->nstates != 0); /* not empty already */ - cnfa->nstates = 0; - FREE(cnfa->states); - FREE(cnfa->arcs); -} - -/* - * dumpnfa - dump an NFA in human-readable form - */ -static void -dumpnfa(struct nfa * nfa, - FILE *f) -{ -#ifdef REG_DEBUG - struct state *s; - - fprintf(f, "pre %d, post %d", nfa->pre->no, nfa->post->no); - if (nfa->bos[0] != COLORLESS) - fprintf(f, ", bos [%ld]", (long) nfa->bos[0]); - if (nfa->bos[1] != COLORLESS) - fprintf(f, ", bol [%ld]", (long) nfa->bos[1]); - if (nfa->eos[0] != COLORLESS) - fprintf(f, ", eos [%ld]", (long) nfa->eos[0]); - if (nfa->eos[1] != COLORLESS) - fprintf(f, ", eol [%ld]", (long) nfa->eos[1]); - fprintf(f, "\n"); - for (s = nfa->states; s != NULL; s = s->next) - dumpstate(s, f); - if (nfa->parent == NULL) - dumpcolors(nfa->cm, f); - fflush(f); -#endif -} - -#ifdef REG_DEBUG /* subordinates of dumpnfa */ - -/* - * dumpstate - dump an NFA state in human-readable form - */ -static void -dumpstate(struct state * s, - FILE *f) -{ - struct arc *a; - - fprintf(f, "%d%s%c", s->no, (s->tmp != NULL) ? "T" : "", - (s->flag) ? s->flag : '.'); - if (s->prev != NULL && s->prev->next != s) - fprintf(f, "\tstate chain bad\n"); - if (s->nouts == 0) - fprintf(f, "\tno out arcs\n"); - else - dumparcs(s, f); - fflush(f); - for (a = s->ins; a != NULL; a = a->inchain) - { - if (a->to != s) - fprintf(f, "\tlink from %d to %d on %d's in-chain\n", - a->from->no, a->to->no, s->no); - } -} - -/* - * dumparcs - dump out-arcs in human-readable form - */ -static void -dumparcs(struct state * s, - FILE *f) -{ - int pos; - - assert(s->nouts > 0); - /* printing arcs in reverse order is usually clearer */ - pos = dumprarcs(s->outs, s, f, 1); - if (pos != 1) - fprintf(f, "\n"); -} - -/* - * dumprarcs - dump remaining outarcs, recursively, in reverse order - */ -static int /* resulting print position */ -dumprarcs(struct arc * a, - struct state * s, - FILE *f, - int pos) /* initial print position */ -{ - if (a->outchain != NULL) - pos = dumprarcs(a->outchain, s, f, pos); - dumparc(a, s, f); - if (pos == 5) - { - fprintf(f, "\n"); - pos = 1; - } - else - pos++; - return pos; -} - -/* - * dumparc - dump one outarc in readable form, including prefixing tab - */ -static void -dumparc(struct arc * a, - struct state * s, - FILE *f) -{ - struct arc *aa; - struct arcbatch *ab; - - fprintf(f, "\t"); - switch (a->type) - { - case PLAIN: - fprintf(f, "[%ld]", (long) a->co); - break; - case AHEAD: - fprintf(f, ">%ld>", (long) a->co); - break; - case BEHIND: - fprintf(f, "<%ld<", (long) a->co); - break; - case LACON: - fprintf(f, ":%ld:", (long) a->co); - break; - case '^': - case '$': - fprintf(f, "%c%d", a->type, (int) a->co); - break; - case EMPTY: - break; - default: - fprintf(f, "0x%x/0%lo", a->type, (long) a->co); - break; - } - if (a->from != s) - fprintf(f, "?%d?", a->from->no); - for (ab = &a->from->oas; ab != NULL; ab = ab->next) - { - for (aa = &ab->a[0]; aa < &ab->a[ABSIZE]; aa++) - if (aa == a) - break; /* NOTE BREAK OUT */ - if (aa < &ab->a[ABSIZE]) /* propagate break */ - break; /* NOTE BREAK OUT */ - } - if (ab == NULL) - fprintf(f, "?!?"); /* not in allocated space */ - fprintf(f, "->"); - if (a->to == NULL) - { - fprintf(f, "NULL"); - return; - } - fprintf(f, "%d", a->to->no); - for (aa = a->to->ins; aa != NULL; aa = aa->inchain) - if (aa == a) - break; /* NOTE BREAK OUT */ - if (aa == NULL) - fprintf(f, "?!?"); /* missing from in-chain */ -} -#endif /* REG_DEBUG */ - -/* - * dumpcnfa - dump a compacted NFA in human-readable form - */ -#ifdef REG_DEBUG -static void -dumpcnfa(struct cnfa * cnfa, - FILE *f) -{ - int st; - - fprintf(f, "pre %d, post %d", cnfa->pre, cnfa->post); - if (cnfa->bos[0] != COLORLESS) - fprintf(f, ", bos [%ld]", (long) cnfa->bos[0]); - if (cnfa->bos[1] != COLORLESS) - fprintf(f, ", bol [%ld]", (long) cnfa->bos[1]); - if (cnfa->eos[0] != COLORLESS) - fprintf(f, ", eos [%ld]", (long) cnfa->eos[0]); - if (cnfa->eos[1] != COLORLESS) - fprintf(f, ", eol [%ld]", (long) cnfa->eos[1]); - if (cnfa->flags & HASLACONS) - fprintf(f, ", haslacons"); - fprintf(f, "\n"); - for (st = 0; st < cnfa->nstates; st++) - dumpcstate(st, cnfa->states[st], cnfa, f); - fflush(f); -} -#endif - -#ifdef REG_DEBUG /* subordinates of dumpcnfa */ - -/* - * dumpcstate - dump a compacted-NFA state in human-readable form - */ -static void -dumpcstate(int st, - struct carc * ca, - struct cnfa * cnfa, - FILE *f) -{ - int i; - int pos; - - fprintf(f, "%d%s", st, (ca[0].co) ? ":" : "."); - pos = 1; - for (i = 1; ca[i].co != COLORLESS; i++) - { - if (ca[i].co < cnfa->ncolors) - fprintf(f, "\t[%ld]->%d", (long) ca[i].co, ca[i].to); - else - fprintf(f, "\t:%ld:->%d", (long) ca[i].co - cnfa->ncolors, - ca[i].to); - if (pos == 5) - { - fprintf(f, "\n"); - pos = 1; - } - else - pos++; - } - if (i == 1 || pos != 1) - fprintf(f, "\n"); - fflush(f); -} - -#endif /* REG_DEBUG */ diff --git a/src/regex/regcomp.c b/src/regex/regcomp.c index a84b939944..a016af0b77 100644 --- a/src/regex/regcomp.c +++ b/src/regex/regcomp.c @@ -1,2261 +1,1610 @@ -/* - * re_*comp and friends - compile REs - * This file #includes several others (see the bottom). - * - * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. - * - * Development of this software was funded, in part, by Cray Research Inc., - * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics - * Corporation, none of whom are responsible for the results. The author - * thanks all of them. - * - * Redistribution and use in source and binary forms -- with or without - * modification -- are permitted for any purpose, provided that - * redistributions in source form retain this entire copyright notice and - * indicate the origin and nature of any modifications. - * - * I'd appreciate being given credit for this package in the documentation - * of software which uses it, but that is not a requirement. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, - * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY - * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL - * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; - * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR - * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF - * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * $Header: /projects/cvsroot/pgsql-server/src/backend/regex/regcomp.c,v 1.38 2003/08/08 21:41:56 momjian Exp $ - * - */ - -#include "regguts.h" - -/* - * forward declarations, up here so forward datatypes etc. are defined early - */ -/* === regcomp.c === */ -static void moresubs(struct vars *, int); -static int freev(struct vars *, int); -static void makesearch(struct vars *, struct nfa *); -static struct subre *parse(struct vars *, int, int, struct state *, struct state *); -static struct subre *parsebranch(struct vars *, int, int, struct state *, struct state *, int); -static void parseqatom(struct vars *, int, int, struct state *, struct state *, struct subre *); -static void nonword(struct vars *, int, struct state *, struct state *); -static void word(struct vars *, int, struct state *, struct state *); -static int scannum(struct vars *); -static void repeat(struct vars *, struct state *, struct state *, int, int); -static void bracket(struct vars *, struct state *, struct state *); -static void cbracket(struct vars *, struct state *, struct state *); -static void brackpart(struct vars *, struct state *, struct state *); -static chr *scanplain(struct vars *); -static void leaders(struct vars *, struct cvec *); -static void onechr(struct vars *, chr, struct state *, struct state *); -static void dovec(struct vars *, struct cvec *, struct state *, struct state *); -static celt nextleader(struct vars *, chr, chr); -static void wordchrs(struct vars *); -static struct subre *subre(struct vars *, int, int, struct state *, struct state *); -static void freesubre(struct vars *, struct subre *); -static void freesrnode(struct vars *, struct subre *); -static void optst(struct vars *, struct subre *); -static int numst(struct subre *, int); -static void markst(struct subre *); -static void cleanst(struct vars *); -static long nfatree(struct vars *, struct subre *, FILE *); -static long nfanode(struct vars *, struct subre *, FILE *); -static int newlacon(struct vars *, struct state *, struct state *, int); -static void freelacons(struct subre *, int); -static void rfree(regex_t *); - -#ifdef REG_DEBUG -static void dump(regex_t *, FILE *); -static void dumpst(struct subre *, FILE *, int); -static void stdump(struct subre *, FILE *, int); -static char *stid(struct subre *, char *, size_t); -#endif -/* === regc_lex.c === */ -static void lexstart(struct vars *); -static void prefixes(struct vars *); -static void lexnest(struct vars *, chr *, chr *); -static void lexword(struct vars *); -static int next(struct vars *); -static int lexescape(struct vars *); -static chr lexdigits(struct vars *, int, int, int); -static int brenext(struct vars *, chr); -static void skip(struct vars *); -static chr newline(void); -static chr chrnamed(struct vars *, chr *, chr *, chr); - -/* === regc_color.c === */ -static void initcm(struct vars *, struct colormap *); -static void freecm(struct colormap *); -static void cmtreefree(struct colormap *, union tree *, int); -static color setcolor(struct colormap *, chr, pcolor); -static color maxcolor(struct colormap *); -static color newcolor(struct colormap *); -static void freecolor(struct colormap *, pcolor); -static color pseudocolor(struct colormap *); -static color subcolor(struct colormap *, chr c); -static color newsub(struct colormap *, pcolor); -static void subrange(struct vars *, chr, chr, struct state *, struct state *); -static void subblock(struct vars *, chr, struct state *, struct state *); -static void okcolors(struct nfa *, struct colormap *); -static void colorchain(struct colormap *, struct arc *); -static void uncolorchain(struct colormap *, struct arc *); -static int singleton(struct colormap *, chr c); -static void rainbow(struct nfa *, struct colormap *, int, pcolor, struct state *, struct state *); -static void colorcomplement(struct nfa *, struct colormap *, int, struct state *, struct state *, struct state *); - -#ifdef REG_DEBUG -static void dumpcolors(struct colormap *, FILE *); -static void fillcheck(struct colormap *, union tree *, int, FILE *); -static void dumpchr(chr, FILE *); -#endif -/* === regc_nfa.c === */ -static struct nfa *newnfa(struct vars *, struct colormap *, struct nfa *); -static void freenfa(struct nfa *); -static struct state *newstate(struct nfa *); -static struct state *newfstate(struct nfa *, int flag); -static void dropstate(struct nfa *, struct state *); -static void freestate(struct nfa *, struct state *); -static void destroystate(struct nfa *, struct state *); -static void newarc(struct nfa *, int, pcolor, struct state *, struct state *); -static struct arc *allocarc(struct nfa *, struct state *); -static void freearc(struct nfa *, struct arc *); -static struct arc *findarc(struct state *, int, pcolor); -static void cparc(struct nfa *, struct arc *, struct state *, struct state *); -static void moveins(struct nfa *, struct state *, struct state *); -static void copyins(struct nfa *, struct state *, struct state *); -static void moveouts(struct nfa *, struct state *, struct state *); -static void copyouts(struct nfa *, struct state *, struct state *); -static void cloneouts(struct nfa *, struct state *, struct state *, struct state *, int); -static void delsub(struct nfa *, struct state *, struct state *); -static void deltraverse(struct nfa *, struct state *, struct state *); -static void dupnfa(struct nfa *, struct state *, struct state *, struct state *, struct state *); -static void duptraverse(struct nfa *, struct state *, struct state *); -static void cleartraverse(struct nfa *, struct state *); -static void specialcolors(struct nfa *); -static long optimize(struct nfa *, FILE *); -static void pullback(struct nfa *, FILE *); -static int pull(struct nfa *, struct arc *); -static void pushfwd(struct nfa *, FILE *); -static int push(struct nfa *, struct arc *); - -#define INCOMPATIBLE 1 /* destroys arc */ -#define SATISFIED 2 /* constraint satisfied */ -#define COMPATIBLE 3 /* compatible but not satisfied yet */ -static int combine(struct arc *, struct arc *); -static void fixempties(struct nfa *, FILE *); -static int unempty(struct nfa *, struct arc *); -static void cleanup(struct nfa *); -static void markreachable(struct nfa *, struct state *, struct state *, struct state *); -static void markcanreach(struct nfa *, struct state *, struct state *, struct state *); -static long analyze(struct nfa *); -static void compact(struct nfa *, struct cnfa *); -static void carcsort(struct carc *, struct carc *); -static void freecnfa(struct cnfa *); -static void dumpnfa(struct nfa *, FILE *); - -#ifdef REG_DEBUG -static void dumpstate(struct state *, FILE *); -static void dumparcs(struct state *, FILE *); -static int dumprarcs(struct arc *, struct state *, FILE *, int); -static void dumparc(struct arc *, struct state *, FILE *); -static void dumpcnfa(struct cnfa *, FILE *); -static void dumpcstate(int, struct carc *, struct cnfa *, FILE *); -#endif -/* === regc_cvec.c === */ -static struct cvec *newcvec(int, int, int); -static struct cvec *clearcvec(struct cvec *); -static void addchr(struct cvec *, chr); -static void addrange(struct cvec *, chr, chr); -static void addmcce(struct cvec *, chr *, chr *); -static int haschr(struct cvec *, chr); -static struct cvec *getcvec(struct vars *, int, int, int); -static void freecvec(struct cvec *); - -/* === regc_locale.c === */ -static int wx_isdigit(wx_wchar c); -static int wx_isalpha(wx_wchar c); -static int wx_isalnum(wx_wchar c); -static int wx_isupper(wx_wchar c); -static int wx_islower(wx_wchar c); -static int wx_isgraph(wx_wchar c); -static int wx_ispunct(wx_wchar c); -static int wx_isspace(wx_wchar c); -static wx_wchar wx_toupper(wx_wchar c); -static wx_wchar wx_tolower(wx_wchar c); -static int nmcces(struct vars *); -static int nleaders(struct vars *); -static struct cvec *allmcces(struct vars *, struct cvec *); -static celt element(struct vars *, chr *, chr *); -static struct cvec *range(struct vars *, celt, celt, int); -static int before(celt, celt); -static struct cvec *eclass(struct vars *, celt, int); -static struct cvec *cclass(struct vars *, chr *, chr *, int); -static struct cvec *allcases(struct vars *, chr); -static int cmp(const chr *, const chr *, size_t); -static int casecmp(const chr *, const chr *, size_t); - - -/* internal variables, bundled for easy passing around */ -struct vars -{ - regex_t *re; - chr *now; /* scan pointer into string */ - chr *stop; /* end of string */ - chr *savenow; /* saved now and stop for "subroutine - * call" */ - chr *savestop; - int err; /* error code (0 if none) */ - int cflags; /* copy of compile flags */ - int lasttype; /* type of previous token */ - int nexttype; /* type of next token */ - chr nextvalue; /* value (if any) of next token */ - int lexcon; /* lexical context type (see lex.c) */ - int nsubexp; /* subexpression count */ - struct subre **subs; /* subRE pointer vector */ - size_t nsubs; /* length of vector */ - struct subre *sub10[10]; /* initial vector, enough for most */ - struct nfa *nfa; /* the NFA */ - struct colormap *cm; /* character color map */ - color nlcolor; /* color of newline */ - struct state *wordchrs; /* state in nfa holding word-char outarcs */ - struct subre *tree; /* subexpression tree */ - struct subre *treechain; /* all tree nodes allocated */ - struct subre *treefree; /* any free tree nodes */ - int ntree; /* number of tree nodes */ - struct cvec *cv; /* interface cvec */ - struct cvec *cv2; /* utility cvec */ - struct cvec *mcces; /* collating-element information */ -#define ISCELEADER(v,c) (v->mcces != NULL && haschr(v->mcces, (c))) - struct state *mccepbegin; /* in nfa, start of MCCE prototypes */ - struct state *mccepend; /* in nfa, end of MCCE prototypes */ - struct subre *lacons; /* lookahead-constraint vector */ - int nlacons; /* size of lacons */ -}; - -/* parsing macros; most know that `v' is the struct vars pointer */ -#define NEXT() (next(v)) /* advance by one token */ -#define SEE(t) (v->nexttype == (t)) /* is next token this? */ -#define EAT(t) (SEE(t) && next(v)) /* if next is this, swallow it */ -#define VISERR(vv) ((vv)->err != 0) /* have we seen an error yet? */ -#define ISERR() VISERR(v) -#define VERR(vv,e) ((vv)->nexttype = EOS, ((vv)->err) ? (vv)->err :\ - ((vv)->err = (e))) -#define ERR(e) VERR(v, e) /* record an error */ -#define NOERR() {if (ISERR()) return;} /* if error seen, return */ -#define NOERRN() {if (ISERR()) return NULL;} /* NOERR with retval */ -#define NOERRZ() {if (ISERR()) return 0;} /* NOERR with retval */ -#define INSIST(c, e) ((c) ? 0 : ERR(e)) /* if condition false, - * error */ -#define NOTE(b) (v->re->re_info |= (b)) /* note visible condition */ -#define EMPTYARC(x, y) newarc(v->nfa, EMPTY, 0, x, y) - -/* token type codes, some also used as NFA arc types */ -#define EMPTY 'n' /* no token present */ -#define EOS 'e' /* end of string */ -#define PLAIN 'p' /* ordinary character */ -#define DIGIT 'd' /* digit (in bound) */ -#define BACKREF 'b' /* back reference */ -#define COLLEL 'I' /* start of [. */ -#define ECLASS 'E' /* start of [= */ -#define CCLASS 'C' /* start of [: */ -#define END 'X' /* end of [. [= [: */ -#define RANGE 'R' /* - within [] which might be range delim. */ -#define LACON 'L' /* lookahead constraint subRE */ -#define AHEAD 'a' /* color-lookahead arc */ -#define BEHIND 'r' /* color-lookbehind arc */ -#define WBDRY 'w' /* word boundary constraint */ -#define NWBDRY 'W' /* non-word-boundary constraint */ -#define SBEGIN 'A' /* beginning of string (even if not BOL) */ -#define SEND 'Z' /* end of string (even if not EOL) */ -#define PREFER 'P' /* length preference */ - -/* is an arc colored, and hence on a color chain? */ -#define COLORED(a) ((a)->type == PLAIN || (a)->type == AHEAD || \ - (a)->type == BEHIND) - - - -/* static function list */ -static struct fns functions = { - rfree, /* regfree insides */ -}; - - - -/* - * regcomp - compile regular expression - */ -int -regcomp(regex_t *re, - const chr *string, - int flags) -{ - - size_t nLen = 0; - chr* s2 = (chr*) string; - - if (string && *string) - { - while(*++s2); - } - - nLen = ((s2 - string) / sizeof(chr)); - - return wx_regcomp(re, string, nLen, flags); -} -int -wx_regcomp(regex_t *re, - const chr *string, - size_t len, - int flags) -{ - struct vars var; - struct vars *v = &var; - struct guts *g; - int i; - size_t j; - -#ifdef REG_DEBUG - FILE *debug = (flags & REG_PROGRESS) ? stdout : (FILE *) NULL; - +#if defined(__MWERKS__) && !defined(__MACH__) +typedef long off_t ; #else - FILE *debug = (FILE *) NULL; +#include +#endif +#include +#include +#include +#include +#include +#include "regex.h" + +#include "utils.h" +#include "regex2.h" + +#include "cclass.h" +#include "cname.h" + +/* + * parse structure, passed up and down to avoid global variables and + * other clumsinesses + */ +struct parse { + char *next; /* next character in RE */ + char *end; /* end of string (-> NUL normally) */ + int error; /* has an error been seen? */ + sop *strip; /* malloced strip */ + sopno ssize; /* malloced strip size (allocated) */ + sopno slen; /* malloced strip length (used) */ + int ncsalloc; /* number of csets allocated */ + struct re_guts *g; +# define NPAREN 10 /* we need to remember () 1-9 for back refs */ + sopno pbegin[NPAREN]; /* -> ( ([0] unused) */ + sopno pend[NPAREN]; /* -> ) ([0] unused) */ +}; + +#include "regcomp.ih" + +static char nuls[10]; /* place to point scanner in event of error */ + +/* + * macros for use with parse structure + * BEWARE: these know that the parse structure is named `p' !!! + */ +#define PEEK() (*p->next) +#define PEEK2() (*(p->next+1)) +#define MORE() (p->next < p->end) +#define MORE2() (p->next+1 < p->end) +#define SEE(c) (MORE() && PEEK() == (c)) +#define SEETWO(a, b) (MORE() && MORE2() && PEEK() == (a) && PEEK2() == (b)) +#define EAT(c) ((SEE(c)) ? (NEXT(), 1) : 0) +#define EATTWO(a, b) ((SEETWO(a, b)) ? (NEXT2(), 1) : 0) +#define NEXT() (p->next++) +#define NEXT2() (p->next += 2) +#define NEXTn(n) (p->next += (n)) +#define GETNEXT() (*p->next++) +#define SETERROR(e) seterr(p, (e)) +#define REQUIRE(co, e) ((void)((co) || SETERROR(e))) +#define MUSTSEE(c, e) (REQUIRE(MORE() && PEEK() == (c), e)) +#define MUSTEAT(c, e) (REQUIRE(MORE() && GETNEXT() == (c), e)) +#define MUSTNOTSEE(c, e) (REQUIRE(!MORE() || PEEK() != (c), e)) +#define EMIT(op, sopnd) doemit(p, (sop)(op), (size_t)(sopnd)) +#define INSERT(op, pos) doinsert(p, (sop)(op), HERE()-(pos)+1, pos) +#define AHEAD(pos) dofwd(p, pos, HERE()-(pos)) +#define ASTERN(sop, pos) EMIT(sop, HERE()-pos) +#define HERE() (p->slen) +#define THERE() (p->slen - 1) +#define THERETHERE() (p->slen - 2) +#define DROP(n) (p->slen -= (n)) + +#ifndef NDEBUG +static int never = 0; /* for use in asserts; shuts lint up */ +#else +#define never 0 /* some s have bugs too */ #endif -#define CNOERR() { if (ISERR()) return freev(v, v->err); } +/* + - regcomp - interface for parser and compilation + = extern int regcomp(regex_t *, const char *, int); + = #define REG_BASIC 0000 + = #define REG_EXTENDED 0001 + = #define REG_ICASE 0002 + = #define REG_NOSUB 0004 + = #define REG_NEWLINE 0010 + = #define REG_NOSPEC 0020 + = #define REG_PEND 0040 + = #define REG_DUMP 0200 + */ +int /* 0 success, otherwise REG_something */ +regcomp(preg, pattern, cflags) +regex_t *preg; +const char *pattern; +int cflags; +{ + struct parse pa; + register struct re_guts *g; + register struct parse *p = &pa; + register int i; + register size_t len; +#ifdef REDEBUG +# define GOODFLAGS(f) (f) +#else +# define GOODFLAGS(f) ((f)&~REG_DUMP) +#endif - /* sanity checks */ + cflags = GOODFLAGS(cflags); + if ((cflags®_EXTENDED) && (cflags®_NOSPEC)) + return(REG_INVARG); - if (re == NULL || string == NULL) - return REG_INVARG; - if ((flags & REG_QUOTE) && - (flags & (REG_ADVANCED | REG_EXPANDED | REG_NEWLINE))) - return REG_INVARG; - if (!(flags & REG_EXTENDED) && (flags & REG_ADVF)) - return REG_INVARG; + if (cflags®_PEND) { + if (preg->re_endp < pattern) + return(REG_INVARG); + len = preg->re_endp - pattern; + } else + len = strlen((char *)pattern); - /* initial setup (after which freev() is callable) */ - v->re = re; - v->now = (chr *) string; - v->stop = v->now + len; - v->savenow = v->savestop = NULL; - v->err = 0; - v->cflags = flags; - v->nsubexp = 0; - v->subs = v->sub10; - v->nsubs = 10; - for (j = 0; j < v->nsubs; j++) - v->subs[j] = NULL; - v->nfa = NULL; - v->cm = NULL; - v->nlcolor = COLORLESS; - v->wordchrs = NULL; - v->tree = NULL; - v->treechain = NULL; - v->treefree = NULL; - v->cv = NULL; - v->cv2 = NULL; - v->mcces = NULL; - v->lacons = NULL; - v->nlacons = 0; - re->re_magic = REMAGIC; - re->re_info = 0; /* bits get set during parse */ - re->re_csize = sizeof(chr); - re->re_guts = NULL; - re->re_fns = VS(&functions); - - /* more complex setup, malloced things */ - re->re_guts = VS(MALLOC(sizeof(struct guts))); - if (re->re_guts == NULL) - return freev(v, REG_ESPACE); - g = (struct guts *) re->re_guts; - g->tree = NULL; - initcm(v, &g->cmap); - v->cm = &g->cmap; - g->lacons = NULL; - g->nlacons = 0; - ZAPCNFA(g->search); - v->nfa = newnfa(v, v->cm, (struct nfa *) NULL); - CNOERR(); - v->cv = newcvec(100, 20, 10); - if (v->cv == NULL) - return freev(v, REG_ESPACE); - i = nmcces(v); - if (i > 0) - { - v->mcces = newcvec(nleaders(v), 0, i); - CNOERR(); - v->mcces = allmcces(v, v->mcces); - leaders(v, v->mcces); - addmcce(v->mcces, (chr *) NULL, (chr *) NULL); /* dummy */ + /* do the mallocs early so failure handling is easy */ + g = (struct re_guts *)malloc(sizeof(struct re_guts) + + (NC-1)*sizeof(cat_t)); + if (g == NULL) + return(REG_ESPACE); + p->ssize = len/(size_t)2*(size_t)3 + (size_t)1; /* ugh */ + p->strip = (sop *)malloc(p->ssize * sizeof(sop)); + p->slen = 0; + if (p->strip == NULL) { + free((char *)g); + return(REG_ESPACE); } - CNOERR(); - /* parsing */ - lexstart(v); /* also handles prefixes */ - if ((v->cflags & REG_NLSTOP) || (v->cflags & REG_NLANCH)) - { - /* assign newline a unique color */ - v->nlcolor = subcolor(v->cm, newline()); - okcolors(v->nfa, v->cm); + /* set things up */ + p->g = g; + p->next = (char *)pattern; /* convenience; we do not modify it */ + p->end = p->next + len; + p->error = 0; + p->ncsalloc = 0; + for (i = 0; i < NPAREN; i++) { + p->pbegin[i] = 0; + p->pend[i] = 0; } - CNOERR(); - v->tree = parse(v, EOS, PLAIN, v->nfa->init, v->nfa->final); - assert(SEE(EOS)); /* even if error; ISERR() => SEE(EOS) */ - CNOERR(); - assert(v->tree != NULL); + g->csetsize = NC; + g->sets = NULL; + g->setbits = NULL; + g->ncsets = 0; + g->cflags = cflags; + g->iflags = 0; + g->nbol = 0; + g->neol = 0; + g->must = NULL; + g->mlen = 0; + g->nsub = 0; + g->ncategories = 1; /* category 0 is "everything else" */ + g->categories = &g->catspace[-(CHAR_MIN)]; + (void) memset((char *)g->catspace, 0, NC*sizeof(cat_t)); + g->backrefs = 0; - /* finish setup of nfa and its subre tree */ - specialcolors(v->nfa); - CNOERR(); -#ifdef REG_DEBUG - if (debug != NULL) - { - fprintf(debug, "\n\n\n========= RAW ==========\n"); - dumpnfa(v->nfa, debug); - dumpst(v->tree, debug, 1); - } -#endif - optst(v, v->tree); - v->ntree = numst(v->tree, 1); - markst(v->tree); - cleanst(v); -#ifdef REG_DEBUG - if (debug != NULL) - { - fprintf(debug, "\n\n\n========= TREE FIXED ==========\n"); - dumpst(v->tree, debug, 1); - } + /* do it */ + EMIT(OEND, 0); + g->firststate = THERE(); + if (cflags®_EXTENDED) + p_ere(p, OUT); + else if (cflags®_NOSPEC) + p_str(p); + else + p_bre(p, OUT, OUT); + EMIT(OEND, 0); + g->laststate = THERE(); + + /* tidy up loose ends and fill things in */ + categorize(p, g); + stripsnug(p, g); + findmust(p, g); + g->nplus = pluscount(p, g); + g->magic = MAGIC2; + preg->re_nsub = g->nsub; + preg->re_g = g; + preg->re_magic = MAGIC1; +#ifndef REDEBUG + /* not debugging, so can't rely on the assert() in regexec() */ + if (g->iflags&BAD) + SETERROR(REG_ASSERT); #endif - /* build compacted NFAs for tree and lacons */ - re->re_info |= nfatree(v, v->tree, debug); - CNOERR(); - assert(v->nlacons == 0 || v->lacons != NULL); - for (i = 1; i < v->nlacons; i++) - { -#ifdef REG_DEBUG - if (debug != NULL) - fprintf(debug, "\n\n\n========= LA%d ==========\n", i); -#endif - nfanode(v, &v->lacons[i], debug); - } - CNOERR(); - if (v->tree->flags & SHORTER) - NOTE(REG_USHORTEST); - - /* build compacted NFAs for tree, lacons, fast search */ -#ifdef REG_DEBUG - if (debug != NULL) - fprintf(debug, "\n\n\n========= SEARCH ==========\n"); -#endif - /* can sacrifice main NFA now, so use it as work area */ - (DISCARD) optimize(v->nfa, debug); - CNOERR(); - makesearch(v, v->nfa); - CNOERR(); - compact(v->nfa, &g->search); - CNOERR(); - - /* looks okay, package it up */ - re->re_nsub = v->nsubexp; - v->re = NULL; /* freev no longer frees re */ - g->magic = GUTSMAGIC; - g->cflags = v->cflags; - g->info = re->re_info; - g->nsub = re->re_nsub; - g->tree = v->tree; - v->tree = NULL; - g->ntree = v->ntree; - g->compare = (v->cflags & REG_ICASE) ? casecmp : cmp; - g->lacons = v->lacons; - v->lacons = NULL; - g->nlacons = v->nlacons; - -#ifdef REG_DEBUG - if (flags & REG_DUMP) - dump(re, stdout); -#endif - - assert(v->err == 0); - return freev(v, 0); + /* win or lose, we're done */ + if (p->error != 0) /* lose */ + regfree(preg); + return(p->error); } /* - * moresubs - enlarge subRE vector + - p_ere - ERE parser top level, concatenation and alternation + == static void p_ere(register struct parse *p, int stop); */ static void -moresubs(struct vars * v, - int wanted) /* want enough room for this one */ +p_ere(p, stop) +register struct parse *p; +int stop; /* character this ERE should end at */ { - struct subre **p; - size_t n; + register char c; + register sopno prevback; + register sopno prevfwd; + register sopno conc; + register int first = 1; /* is this the first alternative? */ - assert(wanted > 0 && (size_t) wanted >= v->nsubs); - n = (size_t) wanted *3 / 2 + 1; + for (;;) { + /* do a bunch of concatenated expressions */ + conc = HERE(); + while (MORE() && (c = PEEK()) != '|' && c != stop) + p_ere_exp(p); + REQUIRE(HERE() != conc, REG_EMPTY); /* require nonempty */ - if (v->subs == v->sub10) - { - p = (struct subre **) MALLOC(n * sizeof(struct subre *)); - if (p != NULL) - memcpy(VS(p), VS(v->subs), - v->nsubs * sizeof(struct subre *)); + if (!EAT('|')) + break; /* NOTE BREAK OUT */ + + if (first) { + INSERT(OCH_, conc); /* offset is wrong */ + prevfwd = conc; + prevback = conc; + first = 0; } - else - p = (struct subre **) REALLOC(v->subs, n * sizeof(struct subre *)); - if (p == NULL) - { - ERR(REG_ESPACE); - return; + ASTERN(OOR1, prevback); + prevback = THERE(); + AHEAD(prevfwd); /* fix previous offset */ + prevfwd = HERE(); + EMIT(OOR2, 0); /* offset is very wrong */ } - v->subs = p; - for (p = &v->subs[v->nsubs]; v->nsubs < n; p++, v->nsubs++) - *p = NULL; - assert(v->nsubs == n); - assert((size_t) wanted < v->nsubs); + + if (!first) { /* tail-end fixups */ + AHEAD(prevfwd); + ASTERN(O_CH, prevback); + } + + assert(!MORE() || SEE(stop)); } /* - * freev - free vars struct's substructures where necessary + - p_ere_exp - parse one subERE, an atom possibly followed by a repetition op + == static void p_ere_exp(register struct parse *p); + */ +static void +p_ere_exp(p) +register struct parse *p; +{ + register char c; + register sopno pos; + register int count; + register int count2; + register sopno subno; + int wascaret = 0; + + assert(MORE()); /* caller should have ensured this */ + c = GETNEXT(); + + pos = HERE(); + switch (c) { + case '(': + REQUIRE(MORE(), REG_EPAREN); + p->g->nsub++; + subno = p->g->nsub; + if (subno < NPAREN) + p->pbegin[subno] = HERE(); + EMIT(OLPAREN, subno); + if (!SEE(')')) + p_ere(p, ')'); + if (subno < NPAREN) { + p->pend[subno] = HERE(); + assert(p->pend[subno] != 0); + } + EMIT(ORPAREN, subno); + MUSTEAT(')', REG_EPAREN); + break; +#ifndef POSIX_MISTAKE + case ')': /* happens only if no current unmatched ( */ + /* + * You may ask, why the ifndef? Because I didn't notice + * this until slightly too late for 1003.2, and none of the + * other 1003.2 regular-expression reviewers noticed it at + * all. So an unmatched ) is legal POSIX, at least until + * we can get it fixed. + */ + SETERROR(REG_EPAREN); + break; +#endif + case '^': + EMIT(OBOL, 0); + p->g->iflags |= USEBOL; + p->g->nbol++; + wascaret = 1; + break; + case '$': + EMIT(OEOL, 0); + p->g->iflags |= USEEOL; + p->g->neol++; + break; + case '|': + SETERROR(REG_EMPTY); + break; + case '*': + case '+': + case '?': + SETERROR(REG_BADRPT); + break; + case '.': + if (p->g->cflags®_NEWLINE) + nonnewline(p); + else + EMIT(OANY, 0); + break; + case '[': + p_bracket(p); + break; + case '\\': + REQUIRE(MORE(), REG_EESCAPE); + c = GETNEXT(); + ordinary(p, c); + break; + case '{': /* okay as ordinary except if digit follows */ + REQUIRE(!MORE() || !isdigit(PEEK()), REG_BADRPT); + /* FALLTHROUGH */ + default: + ordinary(p, c); + break; + } + + if (!MORE()) + return; + c = PEEK(); + /* we call { a repetition if followed by a digit */ + if (!( c == '*' || c == '+' || c == '?' || + (c == '{' && MORE2() && isdigit(PEEK2())) )) + return; /* no repetition, we're done */ + NEXT(); + + REQUIRE(!wascaret, REG_BADRPT); + switch (c) { + case '*': /* implemented as +? */ + /* this case does not require the (y|) trick, noKLUDGE */ + INSERT(OPLUS_, pos); + ASTERN(O_PLUS, pos); + INSERT(OQUEST_, pos); + ASTERN(O_QUEST, pos); + break; + case '+': + INSERT(OPLUS_, pos); + ASTERN(O_PLUS, pos); + break; + case '?': + /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */ + INSERT(OCH_, pos); /* offset slightly wrong */ + ASTERN(OOR1, pos); /* this one's right */ + AHEAD(pos); /* fix the OCH_ */ + EMIT(OOR2, 0); /* offset very wrong... */ + AHEAD(THERE()); /* ...so fix it */ + ASTERN(O_CH, THERETHERE()); + break; + case '{': + count = p_count(p); + if (EAT(',')) { + if (isdigit(PEEK())) { + count2 = p_count(p); + REQUIRE(count <= count2, REG_BADBR); + } else /* single number with comma */ + count2 = INFINITY; + } else /* just a single number */ + count2 = count; + repeat(p, pos, count, count2); + if (!EAT('}')) { /* error heuristics */ + while (MORE() && PEEK() != '}') + NEXT(); + REQUIRE(MORE(), REG_EBRACE); + SETERROR(REG_BADBR); + } + break; + } + + if (!MORE()) + return; + c = PEEK(); + if (!( c == '*' || c == '+' || c == '?' || + (c == '{' && MORE2() && isdigit(PEEK2())) ) ) + return; + SETERROR(REG_BADRPT); +} + +/* + - p_str - string (no metacharacters) "parser" + == static void p_str(register struct parse *p); + */ +static void +p_str(p) +register struct parse *p; +{ + REQUIRE(MORE(), REG_EMPTY); + while (MORE()) + ordinary(p, GETNEXT()); +} + +/* + - p_bre - BRE parser top level, anchoring and concatenation + == static void p_bre(register struct parse *p, register int end1, \ + == register int end2); + * Giving end1 as OUT essentially eliminates the end1/end2 check. * - * Optionally does error-number setting, and always returns error code - * (if any), to make error-handling code terser. + * This implementation is a bit of a kludge, in that a trailing $ is first + * taken as an ordinary character and then revised to be an anchor. The + * only undesirable side effect is that '$' gets included as a character + * category in such cases. This is fairly harmless; not worth fixing. + * The amount of lookahead needed to avoid this kludge is excessive. + */ +static void +p_bre(p, end1, end2) +register struct parse *p; +register int end1; /* first terminating character */ +register int end2; /* second terminating character */ +{ + register sopno start = HERE(); + register int first = 1; /* first subexpression? */ + register int wasdollar = 0; + + if (EAT('^')) { + EMIT(OBOL, 0); + p->g->iflags |= USEBOL; + p->g->nbol++; + } + while (MORE() && !SEETWO(end1, end2)) { + wasdollar = p_simp_re(p, first); + first = 0; + } + if (wasdollar) { /* oops, that was a trailing anchor */ + DROP(1); + EMIT(OEOL, 0); + p->g->iflags |= USEEOL; + p->g->neol++; + } + + REQUIRE(HERE() != start, REG_EMPTY); /* require nonempty */ +} + +/* + - p_simp_re - parse a simple RE, an atom possibly followed by a repetition + == static int p_simp_re(register struct parse *p, int starordinary); + */ +static int /* was the simple RE an unbackslashed $? */ +p_simp_re(p, starordinary) +register struct parse *p; +int starordinary; /* is a leading * an ordinary character? */ +{ + register int c; + register int count; + register int count2; + register sopno pos; + register int i; + register sopno subno; +# define BACKSL (1<g->cflags®_NEWLINE) + nonnewline(p); + else + EMIT(OANY, 0); + break; + case '[': + p_bracket(p); + break; + case BACKSL|'{': + SETERROR(REG_BADRPT); + break; + case BACKSL|'(': + p->g->nsub++; + subno = p->g->nsub; + if (subno < NPAREN) + p->pbegin[subno] = HERE(); + EMIT(OLPAREN, subno); + /* the MORE here is an error heuristic */ + if (MORE() && !SEETWO('\\', ')')) + p_bre(p, '\\', ')'); + if (subno < NPAREN) { + p->pend[subno] = HERE(); + assert(p->pend[subno] != 0); + } + EMIT(ORPAREN, subno); + REQUIRE(EATTWO('\\', ')'), REG_EPAREN); + break; + case BACKSL|')': /* should not get here -- must be user */ + case BACKSL|'}': + SETERROR(REG_EPAREN); + break; + case BACKSL|'1': + case BACKSL|'2': + case BACKSL|'3': + case BACKSL|'4': + case BACKSL|'5': + case BACKSL|'6': + case BACKSL|'7': + case BACKSL|'8': + case BACKSL|'9': + i = (c&~BACKSL) - '0'; + assert(i < NPAREN); + if (p->pend[i] != 0) { + assert(i <= p->g->nsub); + EMIT(OBACK_, i); + assert(p->pbegin[i] != 0); + assert(OP(p->strip[p->pbegin[i]]) == OLPAREN); + assert(OP(p->strip[p->pend[i]]) == ORPAREN); + (void) dupl(p, p->pbegin[i]+1, p->pend[i]); + EMIT(O_BACK, i); + } else + SETERROR(REG_ESUBREG); + p->g->backrefs = 1; + break; + case '*': + REQUIRE(starordinary, REG_BADRPT); + /* FALLTHROUGH */ + default: + ordinary(p, (char)c); /* takes off BACKSL, if any */ + break; + } + + if (EAT('*')) { /* implemented as +? */ + /* this case does not require the (y|) trick, noKLUDGE */ + INSERT(OPLUS_, pos); + ASTERN(O_PLUS, pos); + INSERT(OQUEST_, pos); + ASTERN(O_QUEST, pos); + } else if (EATTWO('\\', '{')) { + count = p_count(p); + if (EAT(',')) { + if (MORE() && isdigit(PEEK())) { + count2 = p_count(p); + REQUIRE(count <= count2, REG_BADBR); + } else /* single number with comma */ + count2 = INFINITY; + } else /* just a single number */ + count2 = count; + repeat(p, pos, count, count2); + if (!EATTWO('\\', '}')) { /* error heuristics */ + while (MORE() && !SEETWO('\\', '}')) + NEXT(); + REQUIRE(MORE(), REG_EBRACE); + SETERROR(REG_BADBR); + } + } else if (c == (unsigned char)'$') /* $ (but not \$) ends it */ + return(1); + + return(0); +} + +/* + - p_count - parse a repetition count + == static int p_count(register struct parse *p); + */ +static int /* the value */ +p_count(p) +register struct parse *p; +{ + register int count = 0; + register int ndigits = 0; + + while (MORE() && isdigit(PEEK()) && count <= DUPMAX) { + count = count*10 + (GETNEXT() - '0'); + ndigits++; + } + + REQUIRE(ndigits > 0 && count <= DUPMAX, REG_BADBR); + return(count); +} + +/* + - p_bracket - parse a bracketed character list + == static void p_bracket(register struct parse *p); + * + * Note a significant property of this code: if the allocset() did SETERROR, + * no set operations are done. + */ +static void +p_bracket(p) +register struct parse *p; +{ + register cset *cs = allocset(p); + register int invert = 0; + + /* Dept of Truly Sickening Special-Case Kludges */ + if (p->next + 5 < p->end && strncmp(p->next, "[:<:]]", 6) == 0) { + EMIT(OBOW, 0); + NEXTn(6); + return; + } + if (p->next + 5 < p->end && strncmp(p->next, "[:>:]]", 6) == 0) { + EMIT(OEOW, 0); + NEXTn(6); + return; + } + + if (EAT('^')) + invert++; /* make note to invert set at end */ + if (EAT(']')) + CHadd(cs, ']'); + else if (EAT('-')) + CHadd(cs, '-'); + while (MORE() && PEEK() != ']' && !SEETWO('-', ']')) + p_b_term(p, cs); + if (EAT('-')) + CHadd(cs, '-'); + MUSTEAT(']', REG_EBRACK); + + if (p->error != 0) /* don't mess things up further */ + return; + + if (p->g->cflags®_ICASE) { + register int i; + register int ci; + + for (i = p->g->csetsize - 1; i >= 0; i--) + if (CHIN(cs, i) && isalpha(i)) { + ci = othercase(i); + if (ci != i) + CHadd(cs, ci); + } + if (cs->multis != NULL) + mccase(p, cs); + } + if (invert) { + register int i; + + for (i = p->g->csetsize - 1; i >= 0; i--) + if (CHIN(cs, i)) + CHsub(cs, i); + else + CHadd(cs, i); + if (p->g->cflags®_NEWLINE) + CHsub(cs, '\n'); + if (cs->multis != NULL) + mcinvert(p, cs); + } + + assert(cs->multis == NULL); /* xxx */ + + if (nch(p, cs) == 1) { /* optimize singleton sets */ + ordinary(p, firstch(p, cs)); + freeset(p, cs); + } else + EMIT(OANYOF, freezeset(p, cs)); +} + +/* + - p_b_term - parse one term of a bracketed character list + == static void p_b_term(register struct parse *p, register cset *cs); + */ +static void +p_b_term(p, cs) +register struct parse *p; +register cset *cs; +{ + register char c; + register char start, finish; + register int i; + + /* classify what we've got */ + switch ((MORE()) ? PEEK() : '\0') { + case '[': + c = (MORE2()) ? PEEK2() : '\0'; + break; + case '-': + SETERROR(REG_ERANGE); + return; /* NOTE RETURN */ + break; + default: + c = '\0'; + break; + } + + switch (c) { + case ':': /* character class */ + NEXT2(); + REQUIRE(MORE(), REG_EBRACK); + c = PEEK(); + REQUIRE(c != '-' && c != ']', REG_ECTYPE); + p_b_cclass(p, cs); + REQUIRE(MORE(), REG_EBRACK); + REQUIRE(EATTWO(':', ']'), REG_ECTYPE); + break; + case '=': /* equivalence class */ + NEXT2(); + REQUIRE(MORE(), REG_EBRACK); + c = PEEK(); + REQUIRE(c != '-' && c != ']', REG_ECOLLATE); + p_b_eclass(p, cs); + REQUIRE(MORE(), REG_EBRACK); + REQUIRE(EATTWO('=', ']'), REG_ECOLLATE); + break; + default: /* symbol, ordinary character, or range */ +/* xxx revision needed for multichar stuff */ + start = p_b_symbol(p); + if (SEE('-') && MORE2() && PEEK2() != ']') { + /* range */ + NEXT(); + if (EAT('-')) + finish = '-'; + else + finish = p_b_symbol(p); + } else + finish = start; +/* xxx what about signed chars here... */ + REQUIRE(start <= finish, REG_ERANGE); + for (i = start; i <= finish; i++) + CHadd(cs, i); + break; + } +} + +/* + - p_b_cclass - parse a character-class name and deal with it + == static void p_b_cclass(register struct parse *p, register cset *cs); + */ +static void +p_b_cclass(p, cs) +register struct parse *p; +register cset *cs; +{ + register char *sp = p->next; + register struct cclass *cp; + register size_t len; + register char *u; + register char c; + + while (MORE() && isalpha(PEEK())) + NEXT(); + len = p->next - sp; + for (cp = cclasses; cp->name != NULL; cp++) + if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0') + break; + if (cp->name == NULL) { + /* oops, didn't find it */ + SETERROR(REG_ECTYPE); + return; + } + + u = cp->chars; + while ((c = *u++) != '\0') + CHadd(cs, c); + for (u = cp->multis; *u != '\0'; u += strlen(u) + 1) + MCadd(p, cs, u); +} + +/* + - p_b_eclass - parse an equivalence-class name and deal with it + == static void p_b_eclass(register struct parse *p, register cset *cs); + * + * This implementation is incomplete. xxx + */ +static void +p_b_eclass(p, cs) +register struct parse *p; +register cset *cs; +{ + register char c; + + c = p_b_coll_elem(p, '='); + CHadd(cs, c); +} + +/* + - p_b_symbol - parse a character or [..]ed multicharacter collating symbol + == static char p_b_symbol(register struct parse *p); + */ +static char /* value of symbol */ +p_b_symbol(p) +register struct parse *p; +{ + register char value; + + REQUIRE(MORE(), REG_EBRACK); + if (!EATTWO('[', '.')) + return(GETNEXT()); + + /* collating symbol */ + value = p_b_coll_elem(p, '.'); + REQUIRE(EATTWO('.', ']'), REG_ECOLLATE); + return(value); +} + +/* + - p_b_coll_elem - parse a collating-element name and look it up + == static char p_b_coll_elem(register struct parse *p, int endc); + */ +static char /* value of collating element */ +p_b_coll_elem(p, endc) +register struct parse *p; +int endc; /* name ended by endc,']' */ +{ + register char *sp = p->next; + register struct cname *cp; + register int len; + + while (MORE() && !SEETWO(endc, ']')) + NEXT(); + if (!MORE()) { + SETERROR(REG_EBRACK); + return(0); + } + len = p->next - sp; + for (cp = cnames; cp->name != NULL; cp++) + if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0') + return(cp->code); /* known name */ + if (len == 1) + return(*sp); /* single character */ + SETERROR(REG_ECOLLATE); /* neither */ + return(0); +} + +/* + - othercase - return the case counterpart of an alphabetic + == static char othercase(int ch); + */ +static char /* if no counterpart, return ch */ +othercase(ch) +int ch; +{ + assert(isalpha(ch)); + if (isupper(ch)) + return(tolower(ch)); + else if (islower(ch)) + return(toupper(ch)); + else /* peculiar, but could happen */ + return(ch); +} + +/* + - bothcases - emit a dualcase version of a two-case character + == static void bothcases(register struct parse *p, int ch); + * + * Boy, is this implementation ever a kludge... + */ +static void +bothcases(p, ch) +register struct parse *p; +int ch; +{ + register char *oldnext = p->next; + register char *oldend = p->end; + char bracket[3]; + + assert(othercase(ch) != ch); /* p_bracket() would recurse */ + p->next = bracket; + p->end = bracket+2; + bracket[0] = ch; + bracket[1] = ']'; + bracket[2] = '\0'; + p_bracket(p); + assert(p->next == bracket+2); + p->next = oldnext; + p->end = oldend; +} + +/* + - ordinary - emit an ordinary character + == static void ordinary(register struct parse *p, register int ch); + */ +static void +ordinary(p, ch) +register struct parse *p; +register int ch; +{ + register cat_t *cap = p->g->categories; + + if ((p->g->cflags®_ICASE) && isalpha(ch) && othercase(ch) != ch) + bothcases(p, ch); + else { + EMIT(OCHAR, (unsigned char)ch); + if (cap[ch] == 0) + cap[ch] = p->g->ncategories++; + } +} + +/* + - nonnewline - emit REG_NEWLINE version of OANY + == static void nonnewline(register struct parse *p); + * + * Boy, is this implementation ever a kludge... + */ +static void +nonnewline(p) +register struct parse *p; +{ + register char *oldnext = p->next; + register char *oldend = p->end; + char bracket[4]; + + p->next = bracket; + p->end = bracket+3; + bracket[0] = '^'; + bracket[1] = '\n'; + bracket[2] = ']'; + bracket[3] = '\0'; + p_bracket(p); + assert(p->next == bracket+3); + p->next = oldnext; + p->end = oldend; +} + +/* + - repeat - generate code for a bounded repetition, recursively if needed + == static void repeat(register struct parse *p, sopno start, int from, int to); + */ +static void +repeat(p, start, from, to) +register struct parse *p; +sopno start; /* operand from here to end of strip */ +int from; /* repeated from this number */ +int to; /* to this number of times (maybe INFINITY) */ +{ + register sopno finish = HERE(); +# define N 2 +# define INF 3 +# define REP(f, t) ((f)*8 + (t)) +# define MAP(n) (((n) <= 1) ? (n) : ((n) == INFINITY) ? INF : N) + register sopno copy; + + if (p->error != 0) /* head off possible runaway recursion */ + return; + + assert(from <= to); + + switch (REP(MAP(from), MAP(to))) { + case REP(0, 0): /* must be user doing this */ + DROP(finish-start); /* drop the operand */ + break; + case REP(0, 1): /* as x{1,1}? */ + case REP(0, N): /* as x{1,n}? */ + case REP(0, INF): /* as x{1,}? */ + /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */ + INSERT(OCH_, start); /* offset is wrong... */ + repeat(p, start+1, 1, to); + ASTERN(OOR1, start); + AHEAD(start); /* ... fix it */ + EMIT(OOR2, 0); + AHEAD(THERE()); + ASTERN(O_CH, THERETHERE()); + break; + case REP(1, 1): /* trivial case */ + /* done */ + break; + case REP(1, N): /* as x?x{1,n-1} */ + /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */ + INSERT(OCH_, start); + ASTERN(OOR1, start); + AHEAD(start); + EMIT(OOR2, 0); /* offset very wrong... */ + AHEAD(THERE()); /* ...so fix it */ + ASTERN(O_CH, THERETHERE()); + copy = dupl(p, start+1, finish+1); + assert(copy == finish+4); + repeat(p, copy, 1, to-1); + break; + case REP(1, INF): /* as x+ */ + INSERT(OPLUS_, start); + ASTERN(O_PLUS, start); + break; + case REP(N, N): /* as xx{m-1,n-1} */ + copy = dupl(p, start, finish); + repeat(p, copy, from-1, to-1); + break; + case REP(N, INF): /* as xx{n-1,INF} */ + copy = dupl(p, start, finish); + repeat(p, copy, from-1, to); + break; + default: /* "can't happen" */ + SETERROR(REG_ASSERT); /* just in case */ + break; + } +} + +/* + - seterr - set an error condition + == static int seterr(register struct parse *p, int e); + */ +static int /* useless but makes type checking happy */ +seterr(p, e) +register struct parse *p; +int e; +{ + if (p->error == 0) /* keep earliest error condition */ + p->error = e; + p->next = nuls; /* try to bring things to a halt */ + p->end = nuls; + return(0); /* make the return value well-defined */ +} + +/* + - allocset - allocate a set of characters for [] + == static cset *allocset(register struct parse *p); + */ +static cset * +allocset(p) +register struct parse *p; +{ + register int no = p->g->ncsets++; + register size_t nc; + register size_t nbytes; + register cset *cs; + register size_t css = (size_t)p->g->csetsize; + register int i; + + if (no >= p->ncsalloc) { /* need another column of space */ + p->ncsalloc += CHAR_BIT; + nc = p->ncsalloc; + assert(nc % CHAR_BIT == 0); + nbytes = nc / CHAR_BIT * css; + if (p->g->sets == NULL) + p->g->sets = (cset *)malloc(nc * sizeof(cset)); + else + p->g->sets = (cset *)realloc((char *)p->g->sets, + nc * sizeof(cset)); + if (p->g->setbits == NULL) + p->g->setbits = (uch *)malloc(nbytes); + else { + p->g->setbits = (uch *)realloc((char *)p->g->setbits, + nbytes); + /* xxx this isn't right if setbits is now NULL */ + for (i = 0; i < no; i++) + p->g->sets[i].ptr = p->g->setbits + css*(i/CHAR_BIT); + } + if (p->g->sets != NULL && p->g->setbits != NULL) + (void) memset((char *)p->g->setbits + (nbytes - css), + 0, css); + else { + no = 0; + SETERROR(REG_ESPACE); + /* caller's responsibility not to do set ops */ + } + } + + assert(p->g->sets != NULL); /* xxx */ + cs = &p->g->sets[no]; + cs->ptr = p->g->setbits + css*((no)/CHAR_BIT); + cs->mask = 1 << ((no) % CHAR_BIT); + cs->hash = 0; + cs->smultis = 0; + cs->multis = NULL; + + return(cs); +} + +/* + - freeset - free a now-unused set + == static void freeset(register struct parse *p, register cset *cs); + */ +static void +freeset(p, cs) +register struct parse *p; +register cset *cs; +{ + register size_t i; + register cset *top = &p->g->sets[p->g->ncsets]; + register size_t css = (size_t)p->g->csetsize; + + for (i = 0; i < css; i++) + CHsub(cs, i); + if (cs == top-1) /* recover only the easy case */ + p->g->ncsets--; +} + +/* + - freezeset - final processing on a set of characters + == static int freezeset(register struct parse *p, register cset *cs); + * + * The main task here is merging identical sets. This is usually a waste + * of time (although the hash code minimizes the overhead), but can win + * big if REG_ICASE is being used. REG_ICASE, by the way, is why the hash + * is done using addition rather than xor -- all ASCII [aA] sets xor to + * the same value! + */ +static int /* set number */ +freezeset(p, cs) +register struct parse *p; +register cset *cs; +{ + register uch h = cs->hash; + register size_t i; + register cset *top = &p->g->sets[p->g->ncsets]; + register cset *cs2; + register size_t css = (size_t)p->g->csetsize; + + /* look for an earlier one which is the same */ + for (cs2 = &p->g->sets[0]; cs2 < top; cs2++) + if (cs2->hash == h && cs2 != cs) { + /* maybe */ + for (i = 0; i < css; i++) + if (!!CHIN(cs2, i) != !!CHIN(cs, i)) + break; /* no */ + if (i == css) + break; /* yes */ + } + + if (cs2 < top) { /* found one */ + freeset(p, cs); + cs = cs2; + } + + return((int)(cs - p->g->sets)); +} + +/* + - firstch - return first character in a set (which must have at least one) + == static int firstch(register struct parse *p, register cset *cs); + */ +static int /* character; there is no "none" value */ +firstch(p, cs) +register struct parse *p; +register cset *cs; +{ + register size_t i; + register size_t css = (size_t)p->g->csetsize; + + for (i = 0; i < css; i++) + if (CHIN(cs, i)) + return((char)i); + assert(never); + return(0); /* arbitrary */ +} + +/* + - nch - number of characters in a set + == static int nch(register struct parse *p, register cset *cs); */ static int -freev(struct vars * v, - int err) +nch(p, cs) +register struct parse *p; +register cset *cs; { - if (v->re != NULL) - rfree(v->re); - if (v->subs != v->sub10) - FREE(v->subs); - if (v->nfa != NULL) - freenfa(v->nfa); - if (v->tree != NULL) - freesubre(v, v->tree); - if (v->treechain != NULL) - cleanst(v); - if (v->cv != NULL) - freecvec(v->cv); - if (v->cv2 != NULL) - freecvec(v->cv2); - if (v->mcces != NULL) - freecvec(v->mcces); - if (v->lacons != NULL) - freelacons(v->lacons, v->nlacons); - ERR(err); /* nop if err==0 */ + register size_t i; + register size_t css = (size_t)p->g->csetsize; + register int n = 0; - return v->err; + for (i = 0; i < css; i++) + if (CHIN(cs, i)) + n++; + return(n); } /* - * makesearch - turn an NFA into a search NFA (implicit prepend of .*?) - * NFA must have been optimize()d already. + - mcadd - add a collating element to a cset + == static void mcadd(register struct parse *p, register cset *cs, \ + == register char *cp); */ static void -makesearch(struct vars * v, - struct nfa * nfa) +mcadd(p, cs, cp) +register struct parse *p; +register cset *cs; +register char *cp; { - struct arc *a; - struct arc *b; - struct state *pre = nfa->pre; - struct state *s; - struct state *s2; - struct state *slist; + register size_t oldend = cs->smultis; - /* no loops are needed if it's anchored */ - for (a = pre->outs; a != NULL; a = a->outchain) - { - assert(a->type == PLAIN); - if (a->co != nfa->bos[0] && a->co != nfa->bos[1]) - break; - } - if (a != NULL) - { - /* add implicit .* in front */ - rainbow(nfa, v->cm, PLAIN, COLORLESS, pre, pre); - - /* and ^* and \A* too -- not always necessary, but harmless */ - newarc(nfa, PLAIN, nfa->bos[0], pre, pre); - newarc(nfa, PLAIN, nfa->bos[1], pre, pre); + cs->smultis += strlen(cp) + 1; + if (cs->multis == NULL) + cs->multis = malloc(cs->smultis); + else + cs->multis = realloc(cs->multis, cs->smultis); + if (cs->multis == NULL) { + SETERROR(REG_ESPACE); + return; } - /* - * Now here's the subtle part. Because many REs have no lookback - * constraints, often knowing when you were in the pre state tells you - * little; it's the next state(s) that are informative. But some of - * them may have other inarcs, i.e. it may be possible to make actual - * progress and then return to one of them. We must de-optimize such - * cases, splitting each such state into progress and no-progress - * states. + (void) strcpy(cs->multis + oldend - 1, cp); + cs->multis[cs->smultis - 1] = '\0'; +} + +/* these functions don't seem to be used (yet?), suppress warnings */ +#if 0 +/* + - mcsub - subtract a collating element from a cset + == static void mcsub(register cset *cs, register char *cp); */ +static void +mcsub(cs, cp) +register cset *cs; +register char *cp; +{ + register char *fp = mcfind(cs, cp); + register size_t len = strlen(fp); - /* first, make a list of the states */ - slist = NULL; - for (a = pre->outs; a != NULL; a = a->outchain) - { - s = a->to; - for (b = s->ins; b != NULL; b = b->inchain) - if (b->from != pre) - break; - if (b != NULL) - { /* must be split */ - s->tmp = slist; - slist = s; - } - } + assert(fp != NULL); + (void) memmove(fp, fp + len + 1, + cs->smultis - (fp + len + 1 - cs->multis)); + cs->smultis -= len; - /* do the splits */ - for (s = slist; s != NULL; s = s2) - { - s2 = newstate(nfa); - copyouts(nfa, s, s2); - for (a = s->ins; a != NULL; a = b) - { - b = a->inchain; - if (a->from != pre) - { - cparc(nfa, a, a->from, s2); - freearc(nfa, a); - } - } - s2 = s->tmp; - s->tmp = NULL; /* clean up while we're at it */ + if (cs->smultis == 0) { + free(cs->multis); + cs->multis = NULL; + return; } + + cs->multis = realloc(cs->multis, cs->smultis); + assert(cs->multis != NULL); } /* - * parse - parse an RE + - mcin - is a collating element in a cset? + == static int mcin(register cset *cs, register char *cp); + */ +static int +mcin(cs, cp) +register cset *cs; +register char *cp; +{ + return(mcfind(cs, cp) != NULL); +} + +/* + - mcfind - find a collating element in a cset + == static char *mcfind(register cset *cs, register char *cp); + */ +static char * +mcfind(cs, cp) +register cset *cs; +register char *cp; +{ + register char *p; + + if (cs->multis == NULL) + return(NULL); + for (p = cs->multis; *p != '\0'; p += strlen(p) + 1) + if (strcmp(cp, p) == 0) + return(p); + return(NULL); +} +#endif /* 0 */ + +/* + - mcinvert - invert the list of collating elements in a cset + == static void mcinvert(register struct parse *p, register cset *cs); * - * This is actually just the top level, which parses a bunch of branches - * tied together with '|'. They appear in the tree as the left children - * of a chain of '|' subres. + * This would have to know the set of possibilities. Implementation + * is deferred. */ -static struct subre * -parse(struct vars * v, - int stopper, /* EOS or ')' */ - int type, /* LACON (lookahead subRE) or PLAIN */ - struct state * init, /* initial state */ - struct state * final) /* final state */ +static void +mcinvert(p, cs) +register struct parse *p; +register cset *cs; { - struct state *left; /* scaffolding for branch */ - struct state *right; - struct subre *branches; /* top level */ - struct subre *branch; /* current branch */ - struct subre *t; /* temporary */ - int firstbranch; /* is this the first branch? */ - - assert(stopper == ')' || stopper == EOS); - - branches = subre(v, '|', LONGER, init, final); - NOERRN(); - branch = branches; - firstbranch = 1; - do - { /* a branch */ - if (!firstbranch) - { - /* need a place to hang it */ - branch->right = subre(v, '|', LONGER, init, final); - NOERRN(); - branch = branch->right; - } - firstbranch = 0; - left = newstate(v->nfa); - right = newstate(v->nfa); - NOERRN(); - EMPTYARC(init, left); - EMPTYARC(right, final); - NOERRN(); - branch->left = parsebranch(v, stopper, type, left, right, 0); - NOERRN(); - branch->flags |= UP(branch->flags | branch->left->flags); - if ((branch->flags & ~branches->flags) != 0) /* new flags */ - for (t = branches; t != branch; t = t->right) - t->flags |= branch->flags; - } while (EAT('|')); - assert(SEE(stopper) || SEE(EOS)); - - if (!SEE(stopper)) - { - assert(stopper == ')' && SEE(EOS)); - ERR(REG_EPAREN); - } - - /* optimize out simple cases */ - if (branch == branches) - { /* only one branch */ - assert(branch->right == NULL); - t = branch->left; - branch->left = NULL; - freesubre(v, branches); - branches = t; - } - else if (!MESSY(branches->flags)) - { /* no interesting innards */ - freesubre(v, branches->left); - branches->left = NULL; - freesubre(v, branches->right); - branches->right = NULL; - branches->op = '='; - } - - return branches; + assert(cs->multis == NULL); /* xxx */ } /* - * parsebranch - parse one branch of an RE + - mccase - add case counterparts of the list of collating elements in a cset + == static void mccase(register struct parse *p, register cset *cs); * - * This mostly manages concatenation, working closely with parseqatom(). - * Concatenated things are bundled up as much as possible, with separate - * ',' nodes introduced only when necessary due to substructure. + * This would have to know the set of possibilities. Implementation + * is deferred. */ -static struct subre * -parsebranch(struct vars * v, - int stopper, /* EOS or ')' */ - int type, /* LACON (lookahead subRE) or PLAIN */ - struct state * left, /* leftmost state */ - struct state * right, /* rightmost state */ - int partial) /* is this only part of a branch? */ +static void +mccase(p, cs) +register struct parse *p; +register cset *cs; { - struct state *lp; /* left end of current construct */ - int seencontent; /* is there anything in this branch yet? */ - struct subre *t; - - lp = left; - seencontent = 0; - t = subre(v, '=', 0, left, right); /* op '=' is tentative */ - NOERRN(); - while (!SEE('|') && !SEE(stopper) && !SEE(EOS)) - { - if (seencontent) - { /* implicit concat operator */ - lp = newstate(v->nfa); - NOERRN(); - moveins(v->nfa, right, lp); - } - seencontent = 1; - - /* NB, recursion in parseqatom() may swallow rest of branch */ - parseqatom(v, stopper, type, lp, right, t); - } - - if (!seencontent) - { /* empty branch */ - if (!partial) - NOTE(REG_UUNSPEC); - assert(lp == left); - EMPTYARC(left, right); - } - - return t; + assert(cs->multis == NULL); /* xxx */ } /* - * parseqatom - parse one quantified atom or constraint of an RE + - isinsets - is this character in any sets? + == static int isinsets(register struct re_guts *g, int c); + */ +static int /* predicate */ +isinsets(g, c) +register struct re_guts *g; +int c; +{ + register uch *col; + register int i; + register int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT; + register unsigned uc = (unsigned char)c; + + for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize) + if (col[uc] != 0) + return(1); + return(0); +} + +/* + - samesets - are these two characters in exactly the same sets? + == static int samesets(register struct re_guts *g, int c1, int c2); + */ +static int /* predicate */ +samesets(g, c1, c2) +register struct re_guts *g; +int c1; +int c2; +{ + register uch *col; + register int i; + register int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT; + register unsigned uc1 = (unsigned char)c1; + register unsigned uc2 = (unsigned char)c2; + + for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize) + if (col[uc1] != col[uc2]) + return(0); + return(1); +} + +/* + - categorize - sort out character categories + == static void categorize(struct parse *p, register struct re_guts *g); + */ +static void +categorize(p, g) +struct parse *p; +register struct re_guts *g; +{ + register cat_t *cats = g->categories; + register int c; + register int c2; + register cat_t cat; + + /* avoid making error situations worse */ + if (p->error != 0) + return; + + for (c = CHAR_MIN; c <= CHAR_MAX; c++) + if (cats[c] == 0 && isinsets(g, c)) { + cat = g->ncategories++; + cats[c] = cat; + for (c2 = c+1; c2 <= CHAR_MAX; c2++) + if (cats[c2] == 0 && samesets(g, c, c2)) + cats[c2] = cat; + } +} + +/* + - dupl - emit a duplicate of a bunch of sops + == static sopno dupl(register struct parse *p, sopno start, sopno finish); + */ +static sopno /* start of duplicate */ +dupl(p, start, finish) +register struct parse *p; +sopno start; /* from here */ +sopno finish; /* to this less one */ +{ + register sopno ret = HERE(); + register sopno len = finish - start; + + assert(finish >= start); + if (len == 0) + return(ret); + enlarge(p, p->ssize + len); /* this many unexpected additions */ + assert(p->ssize >= p->slen + len); + (void) memcpy((char *)(p->strip + p->slen), + (char *)(p->strip + start), (size_t)len*sizeof(sop)); + p->slen += len; + return(ret); +} + +/* + - doemit - emit a strip operator + == static void doemit(register struct parse *p, sop op, size_t opnd); * - * The bookkeeping near the end cooperates very closely with parsebranch(); - * in particular, it contains a recursion that can involve parsing the rest - * of the branch, making this function's name somewhat inaccurate. + * It might seem better to implement this as a macro with a function as + * hard-case backup, but it's just too big and messy unless there are + * some changes to the data structures. Maybe later. */ static void -parseqatom(struct vars * v, - int stopper, /* EOS or ')' */ - int type, /* LACON (lookahead subRE) or PLAIN */ - struct state * lp, /* left state to hang it on */ - struct state * rp, /* right state to hang it on */ - struct subre * top) /* subtree top */ +doemit(p, op, opnd) +register struct parse *p; +sop op; +size_t opnd; { - struct state *s; /* temporaries for new states */ - struct state *s2; - -#define ARCV(t, val) newarc(v->nfa, t, val, lp, rp) - int m, - n; - struct subre *atom; /* atom's subtree */ - struct subre *t; - int cap; /* capturing parens? */ - int pos; /* positive lookahead? */ - int subno; /* capturing-parens or backref number */ - int atomtype; - int qprefer; /* quantifier short/long preference */ - int f; - struct subre **atomp; /* where the pointer to atom is */ - - /* initial bookkeeping */ - atom = NULL; - assert(lp->nouts == 0); /* must string new code */ - assert(rp->nins == 0); /* between lp and rp */ - subno = 0; /* just to shut lint up */ - - /* an atom or constraint... */ - atomtype = v->nexttype; - switch (atomtype) - { - /* first, constraints, which end by returning */ - case '^': - ARCV('^', 1); - if (v->cflags & REG_NLANCH) - ARCV(BEHIND, v->nlcolor); - NEXT(); - return; - break; - case '$': - ARCV('$', 1); - if (v->cflags & REG_NLANCH) - ARCV(AHEAD, v->nlcolor); - NEXT(); - return; - break; - case SBEGIN: - ARCV('^', 1); /* BOL */ - ARCV('^', 0); /* or BOS */ - NEXT(); - return; - break; - case SEND: - ARCV('$', 1); /* EOL */ - ARCV('$', 0); /* or EOS */ - NEXT(); - return; - break; - case '<': - wordchrs(v); /* does NEXT() */ - s = newstate(v->nfa); - NOERR(); - nonword(v, BEHIND, lp, s); - word(v, AHEAD, s, rp); - return; - break; - case '>': - wordchrs(v); /* does NEXT() */ - s = newstate(v->nfa); - NOERR(); - word(v, BEHIND, lp, s); - nonword(v, AHEAD, s, rp); - return; - break; - case WBDRY: - wordchrs(v); /* does NEXT() */ - s = newstate(v->nfa); - NOERR(); - nonword(v, BEHIND, lp, s); - word(v, AHEAD, s, rp); - s = newstate(v->nfa); - NOERR(); - word(v, BEHIND, lp, s); - nonword(v, AHEAD, s, rp); - return; - break; - case NWBDRY: - wordchrs(v); /* does NEXT() */ - s = newstate(v->nfa); - NOERR(); - word(v, BEHIND, lp, s); - word(v, AHEAD, s, rp); - s = newstate(v->nfa); - NOERR(); - nonword(v, BEHIND, lp, s); - nonword(v, AHEAD, s, rp); - return; - break; - case LACON: /* lookahead constraint */ - pos = v->nextvalue; - NEXT(); - s = newstate(v->nfa); - s2 = newstate(v->nfa); - NOERR(); - t = parse(v, ')', LACON, s, s2); - freesubre(v, t); /* internal structure irrelevant */ - assert(SEE(')') || ISERR()); - NEXT(); - n = newlacon(v, s, s2, pos); - NOERR(); - ARCV(LACON, n); - return; - break; - /* then errors, to get them out of the way */ - case '*': - case '+': - case '?': - case '{': - ERR(REG_BADRPT); - return; - break; - default: - ERR(REG_ASSERT); - return; - break; - /* then plain characters, and minor variants on that theme */ - case ')': /* unbalanced paren */ - if ((v->cflags & REG_ADVANCED) != REG_EXTENDED) - { - ERR(REG_EPAREN); - return; - } - /* legal in EREs due to specification botch */ - NOTE(REG_UPBOTCH); - /* fallthrough into case PLAIN */ - case PLAIN: - onechr(v, v->nextvalue, lp, rp); - okcolors(v->nfa, v->cm); - NOERR(); - NEXT(); - break; - case '[': - if (v->nextvalue == 1) - bracket(v, lp, rp); - else - cbracket(v, lp, rp); - assert(SEE(']') || ISERR()); - NEXT(); - break; - case '.': - rainbow(v->nfa, v->cm, PLAIN, - (v->cflags & REG_NLSTOP) ? v->nlcolor : COLORLESS, - lp, rp); - NEXT(); - break; - /* and finally the ugly stuff */ - case '(': /* value flags as capturing or non */ - cap = (type == LACON) ? 0 : v->nextvalue; - if (cap) - { - v->nsubexp++; - subno = v->nsubexp; - if ((size_t) subno >= v->nsubs) - moresubs(v, subno); - assert((size_t) subno < v->nsubs); - } - else - atomtype = PLAIN; /* something that's not '(' */ - NEXT(); - /* need new endpoints because tree will contain pointers */ - s = newstate(v->nfa); - s2 = newstate(v->nfa); - NOERR(); - EMPTYARC(lp, s); - EMPTYARC(s2, rp); - NOERR(); - atom = parse(v, ')', PLAIN, s, s2); - assert(SEE(')') || ISERR()); - NEXT(); - NOERR(); - if (cap) - { - v->subs[subno] = atom; - t = subre(v, '(', atom->flags | CAP, lp, rp); - NOERR(); - t->subno = subno; - t->left = atom; - atom = t; - } - /* postpone everything else pending possible {0} */ - break; - case BACKREF: /* the Feature From The Black Lagoon */ - INSIST(type != LACON, REG_ESUBREG); - INSIST(v->nextvalue < v->nsubs, REG_ESUBREG); - INSIST(v->subs[v->nextvalue] != NULL, REG_ESUBREG); - NOERR(); - assert(v->nextvalue > 0); - atom = subre(v, 'b', BACKR, lp, rp); - subno = v->nextvalue; - atom->subno = subno; - EMPTYARC(lp, rp); /* temporarily, so there's something */ - NEXT(); - break; - } - - /* ...and an atom may be followed by a quantifier */ - switch (v->nexttype) - { - case '*': - m = 0; - n = INFINITY; - qprefer = (v->nextvalue) ? LONGER : SHORTER; - NEXT(); - break; - case '+': - m = 1; - n = INFINITY; - qprefer = (v->nextvalue) ? LONGER : SHORTER; - NEXT(); - break; - case '?': - m = 0; - n = 1; - qprefer = (v->nextvalue) ? LONGER : SHORTER; - NEXT(); - break; - case '{': - NEXT(); - m = scannum(v); - if (EAT(',')) - { - if (SEE(DIGIT)) - n = scannum(v); - else - n = INFINITY; - if (m > n) - { - ERR(REG_BADBR); + /* avoid making error situations worse */ + if (p->error != 0) return; - } - /* {m,n} exercises preference, even if it's {m,m} */ - qprefer = (v->nextvalue) ? LONGER : SHORTER; - } - else - { - n = m; - /* {m} passes operand's preference through */ - qprefer = 0; - } - if (!SEE('}')) - { /* catches errors too */ - ERR(REG_BADBR); - return; - } - NEXT(); - break; - default: /* no quantifier */ - m = n = 1; - qprefer = 0; - break; - } - /* annoying special case: {0} or {0,0} cancels everything */ - if (m == 0 && n == 0) - { - if (atom != NULL) - freesubre(v, atom); - if (atomtype == '(') - v->subs[subno] = NULL; - delsub(v->nfa, lp, rp); - EMPTYARC(lp, rp); - return; - } + /* deal with oversize operands ("can't happen", more or less) */ + assert(opnd < 1<flags)); - f = top->flags | qprefer | ((atom != NULL) ? atom->flags : 0); - if (atomtype != '(' && atomtype != BACKREF && !MESSY(UP(f))) - { - if (!(m == 1 && n == 1)) - repeat(v, lp, rp, m, n); - if (atom != NULL) - freesubre(v, atom); - top->flags = f; - return; - } + /* deal with undersized strip */ + if (p->slen >= p->ssize) + enlarge(p, (p->ssize+1) / 2 * 3); /* +50% */ + assert(p->slen < p->ssize); - /* - * hard part: something messy That is, capturing parens, back - * reference, short/long clash, or an atom with substructure - * containing one of those. - */ - - /* now we'll need a subre for the contents even if they're boring */ - if (atom == NULL) - { - atom = subre(v, '=', 0, lp, rp); - NOERR(); - } - - /* - * prepare a general-purpose state skeleton - * - * ---> [s] ---prefix---> [begin] ---atom---> [end] ----rest---> [rp] / / - * [lp] ----> [s2] ----bypass--------------------- - * - * where bypass is an empty, and prefix is some repetitions of atom - */ - s = newstate(v->nfa); /* first, new endpoints for the atom */ - s2 = newstate(v->nfa); - NOERR(); - moveouts(v->nfa, lp, s); - moveins(v->nfa, rp, s2); - NOERR(); - atom->begin = s; - atom->end = s2; - s = newstate(v->nfa); /* and spots for prefix and bypass */ - s2 = newstate(v->nfa); - NOERR(); - EMPTYARC(lp, s); - EMPTYARC(lp, s2); - NOERR(); - - /* break remaining subRE into x{...} and what follows */ - t = subre(v, '.', COMBINE(qprefer, atom->flags), lp, rp); - t->left = atom; - atomp = &t->left; - /* here we should recurse... but we must postpone that to the end */ - - /* split top into prefix and remaining */ - assert(top->op == '=' && top->left == NULL && top->right == NULL); - top->left = subre(v, '=', top->flags, top->begin, lp); - top->op = '.'; - top->right = t; - - /* if it's a backref, now is the time to replicate the subNFA */ - if (atomtype == BACKREF) - { - assert(atom->begin->nouts == 1); /* just the EMPTY */ - delsub(v->nfa, atom->begin, atom->end); - assert(v->subs[subno] != NULL); - /* and here's why the recursion got postponed: it must */ - /* wait until the skeleton is filled in, because it may */ - /* hit a backref that wants to copy the filled-in skeleton */ - dupnfa(v->nfa, v->subs[subno]->begin, v->subs[subno]->end, - atom->begin, atom->end); - NOERR(); - } - - /* it's quantifier time; first, turn x{0,...} into x{1,...}|empty */ - if (m == 0) - { - EMPTYARC(s2, atom->end); /* the bypass */ - assert(PREF(qprefer) != 0); - f = COMBINE(qprefer, atom->flags); - t = subre(v, '|', f, lp, atom->end); - NOERR(); - t->left = atom; - t->right = subre(v, '|', PREF(f), s2, atom->end); - NOERR(); - t->right->left = subre(v, '=', 0, s2, atom->end); - NOERR(); - *atomp = t; - atomp = &t->left; - m = 1; - } - - /* deal with the rest of the quantifier */ - if (atomtype == BACKREF) - { - /* special case: backrefs have internal quantifiers */ - EMPTYARC(s, atom->begin); /* empty prefix */ - /* just stuff everything into atom */ - repeat(v, atom->begin, atom->end, m, n); - atom->min = (short) m; - atom->max = (short) n; - atom->flags |= COMBINE(qprefer, atom->flags); - } - else if (m == 1 && n == 1) - { - /* no/vacuous quantifier: done */ - EMPTYARC(s, atom->begin); /* empty prefix */ - } - else - { - /* turn x{m,n} into x{m-1,n-1}x, with capturing */ - /* parens in only second x */ - dupnfa(v->nfa, atom->begin, atom->end, s, atom->begin); - assert(m >= 1 && m != INFINITY && n >= 1); - repeat(v, s, atom->begin, m - 1, (n == INFINITY) ? n : n - 1); - f = COMBINE(qprefer, atom->flags); - t = subre(v, '.', f, s, atom->end); /* prefix and atom */ - NOERR(); - t->left = subre(v, '=', PREF(f), s, atom->begin); - NOERR(); - t->right = atom; - *atomp = t; - } - - /* and finally, look after that postponed recursion */ - t = top->right; - if (!(SEE('|') || SEE(stopper) || SEE(EOS))) - t->right = parsebranch(v, stopper, type, atom->end, rp, 1); - else - { - EMPTYARC(atom->end, rp); - t->right = subre(v, '=', 0, atom->end, rp); - } - assert(SEE('|') || SEE(stopper) || SEE(EOS)); - t->flags |= COMBINE(t->flags, t->right->flags); - top->flags |= COMBINE(top->flags, t->flags); + /* finally, it's all reduced to the easy case */ + p->strip[p->slen++] = SOP(op, opnd); } /* - * nonword - generate arcs for non-word-character ahead or behind + - doinsert - insert a sop into the strip + == static void doinsert(register struct parse *p, sop op, size_t opnd, sopno pos); */ static void -nonword(struct vars * v, - int dir, /* AHEAD or BEHIND */ - struct state * lp, - struct state * rp) +doinsert(p, op, opnd, pos) +register struct parse *p; +sop op; +size_t opnd; +sopno pos; { - int anchor = (dir == AHEAD) ? '$' : '^'; + register sopno sn; + register sop s; + register int i; - assert(dir == AHEAD || dir == BEHIND); - newarc(v->nfa, anchor, 1, lp, rp); - newarc(v->nfa, anchor, 0, lp, rp); - colorcomplement(v->nfa, v->cm, dir, v->wordchrs, lp, rp); - /* (no need for special attention to \n) */ + /* avoid making error situations worse */ + if (p->error != 0) + return; + + sn = HERE(); + EMIT(op, opnd); /* do checks, ensure space */ + assert(HERE() == sn+1); + s = p->strip[sn]; + + /* adjust paren pointers */ + assert(pos > 0); + for (i = 1; i < NPAREN; i++) { + if (p->pbegin[i] >= pos) { + p->pbegin[i]++; + } + if (p->pend[i] >= pos) { + p->pend[i]++; + } + } + + memmove((char *)&p->strip[pos+1], (char *)&p->strip[pos], + (HERE()-pos-1)*sizeof(sop)); + p->strip[pos] = s; } /* - * word - generate arcs for word character ahead or behind + - dofwd - complete a forward reference + == static void dofwd(register struct parse *p, sopno pos, sop value); */ static void -word(struct vars * v, - int dir, /* AHEAD or BEHIND */ - struct state * lp, - struct state * rp) +dofwd(p, pos, value) +register struct parse *p; +register sopno pos; +sop value; { - assert(dir == AHEAD || dir == BEHIND); - cloneouts(v->nfa, v->wordchrs, lp, rp, dir); - /* (no need for special attention to \n) */ + /* avoid making error situations worse */ + if (p->error != 0) + return; + + assert(value < 1<strip[pos] = OP(p->strip[pos]) | value; } /* - * scannum - scan a number + - enlarge - enlarge the strip + == static void enlarge(register struct parse *p, sopno size); */ -static int /* value, <= DUPMAX */ -scannum(struct vars * v) +static void +enlarge(p, size) +register struct parse *p; +register sopno size; { - int n = 0; + register sop *sp; - while (SEE(DIGIT) && n < DUPMAX) - { - n = n * 10 + v->nextvalue; - NEXT(); + if (p->ssize >= size) + return; + + sp = (sop *)realloc(p->strip, size*sizeof(sop)); + if (sp == NULL) { + SETERROR(REG_ESPACE); + return; } - if (SEE(DIGIT) || n > DUPMAX) - { - ERR(REG_BADBR); - return 0; - } - return n; + p->strip = sp; + p->ssize = size; } /* - * repeat - replicate subNFA for quantifiers + - stripsnug - compact the strip + == static void stripsnug(register struct parse *p, register struct re_guts *g); + */ +static void +stripsnug(p, g) +register struct parse *p; +register struct re_guts *g; +{ + g->nstates = p->slen; + g->strip = (sop *)realloc((char *)p->strip, p->slen * sizeof(sop)); + if (g->strip == NULL) { + SETERROR(REG_ESPACE); + g->strip = p->strip; + } +} + +/* + - findmust - fill in must and mlen with longest mandatory literal string + == static void findmust(register struct parse *p, register struct re_guts *g); * - * The duplication sequences used here are chosen carefully so that any - * pointers starting out pointing into the subexpression end up pointing into - * the last occurrence. (Note that it may not be strung between the same - * left and right end states, however!) This used to be important for the - * subRE tree, although the important bits are now handled by the in-line - * code in parse(), and when this is called, it doesn't matter any more. - */ -static void -repeat(struct vars * v, - struct state * lp, - struct state * rp, - int m, - int n) -{ -#define SOME 2 -#define INF 3 -#define PAIR(x, y) ((x)*4 + (y)) -#define REDUCE(x) ( ((x) == INFINITY) ? INF : (((x) > 1) ? SOME : (x)) ) - const int rm = REDUCE(m); - const int rn = REDUCE(n); - struct state *s; - struct state *s2; - - switch (PAIR(rm, rn)) - { - case PAIR(0, 0): /* empty string */ - delsub(v->nfa, lp, rp); - EMPTYARC(lp, rp); - break; - case PAIR(0, 1): /* do as x| */ - EMPTYARC(lp, rp); - break; - case PAIR(0, SOME): /* do as x{1,n}| */ - repeat(v, lp, rp, 1, n); - NOERR(); - EMPTYARC(lp, rp); - break; - case PAIR(0, INF): /* loop x around */ - s = newstate(v->nfa); - NOERR(); - moveouts(v->nfa, lp, s); - moveins(v->nfa, rp, s); - EMPTYARC(lp, s); - EMPTYARC(s, rp); - break; - case PAIR(1, 1): /* no action required */ - break; - case PAIR(1, SOME): /* do as x{0,n-1}x = (x{1,n-1}|)x */ - s = newstate(v->nfa); - NOERR(); - moveouts(v->nfa, lp, s); - dupnfa(v->nfa, s, rp, lp, s); - NOERR(); - repeat(v, lp, s, 1, n - 1); - NOERR(); - EMPTYARC(lp, s); - break; - case PAIR(1, INF): /* add loopback arc */ - s = newstate(v->nfa); - s2 = newstate(v->nfa); - NOERR(); - moveouts(v->nfa, lp, s); - moveins(v->nfa, rp, s2); - EMPTYARC(lp, s); - EMPTYARC(s2, rp); - EMPTYARC(s2, s); - break; - case PAIR(SOME, SOME): /* do as x{m-1,n-1}x */ - s = newstate(v->nfa); - NOERR(); - moveouts(v->nfa, lp, s); - dupnfa(v->nfa, s, rp, lp, s); - NOERR(); - repeat(v, lp, s, m - 1, n - 1); - break; - case PAIR(SOME, INF): /* do as x{m-1,}x */ - s = newstate(v->nfa); - NOERR(); - moveouts(v->nfa, lp, s); - dupnfa(v->nfa, s, rp, lp, s); - NOERR(); - repeat(v, lp, s, m - 1, n); - break; - default: - ERR(REG_ASSERT); - break; - } -} - -/* - * bracket - handle non-complemented bracket expression - * Also called from cbracket for complemented bracket expressions. - */ -static void -bracket(struct vars * v, - struct state * lp, - struct state * rp) -{ - assert(SEE('[')); - NEXT(); - while (!SEE(']') && !SEE(EOS)) - brackpart(v, lp, rp); - assert(SEE(']') || ISERR()); - okcolors(v->nfa, v->cm); -} - -/* - * cbracket - handle complemented bracket expression - * We do it by calling bracket() with dummy endpoints, and then complementing - * the result. The alternative would be to invoke rainbow(), and then delete - * arcs as the b.e. is seen... but that gets messy. - */ -static void -cbracket(struct vars * v, - struct state * lp, - struct state * rp) -{ - struct state *left = newstate(v->nfa); - struct state *right = newstate(v->nfa); - struct state *s; - struct arc *a; /* arc from lp */ - struct arc *ba; /* arc from left, from bracket() */ - struct arc *pa; /* MCCE-prototype arc */ - color co; - chr *p; - int i; - - NOERR(); - bracket(v, left, right); - if (v->cflags & REG_NLSTOP) - newarc(v->nfa, PLAIN, v->nlcolor, left, right); - NOERR(); - - assert(lp->nouts == 0); /* all outarcs will be ours */ - - /* easy part of complementing */ - colorcomplement(v->nfa, v->cm, PLAIN, left, lp, rp); - NOERR(); - if (v->mcces == NULL) - { /* no MCCEs -- we're done */ - dropstate(v->nfa, left); - assert(right->nins == 0); - freestate(v->nfa, right); - return; - } - - /* but complementing gets messy in the presence of MCCEs... */ - NOTE(REG_ULOCALE); - for (p = v->mcces->chrs, i = v->mcces->nchrs; i > 0; p++, i--) - { - co = GETCOLOR(v->cm, *p); - a = findarc(lp, PLAIN, co); - ba = findarc(left, PLAIN, co); - if (ba == NULL) - { - assert(a != NULL); - freearc(v->nfa, a); - } - else - assert(a == NULL); - s = newstate(v->nfa); - NOERR(); - newarc(v->nfa, PLAIN, co, lp, s); - NOERR(); - pa = findarc(v->mccepbegin, PLAIN, co); - assert(pa != NULL); - if (ba == NULL) - { /* easy case, need all of them */ - cloneouts(v->nfa, pa->to, s, rp, PLAIN); - newarc(v->nfa, '$', 1, s, rp); - newarc(v->nfa, '$', 0, s, rp); - colorcomplement(v->nfa, v->cm, AHEAD, pa->to, s, rp); - } - else - { /* must be selective */ - if (findarc(ba->to, '$', 1) == NULL) - { - newarc(v->nfa, '$', 1, s, rp); - newarc(v->nfa, '$', 0, s, rp); - colorcomplement(v->nfa, v->cm, AHEAD, pa->to, - s, rp); - } - for (pa = pa->to->outs; pa != NULL; pa = pa->outchain) - if (findarc(ba->to, PLAIN, pa->co) == NULL) - newarc(v->nfa, PLAIN, pa->co, s, rp); - if (s->nouts == 0) /* limit of selectivity: none */ - dropstate(v->nfa, s); /* frees arc too */ - } - NOERR(); - } - - delsub(v->nfa, left, right); - assert(left->nouts == 0); - freestate(v->nfa, left); - assert(right->nins == 0); - freestate(v->nfa, right); -} - -/* - * brackpart - handle one item (or range) within a bracket expression - */ -static void -brackpart(struct vars * v, - struct state * lp, - struct state * rp) -{ - celt startc; - celt endc; - struct cvec *cv; - chr *startp; - chr *endp; - chr c[1]; - - /* parse something, get rid of special cases, take shortcuts */ - switch (v->nexttype) - { - case RANGE: /* a-b-c or other botch */ - ERR(REG_ERANGE); - return; - break; - case PLAIN: - c[0] = v->nextvalue; - NEXT(); - /* shortcut for ordinary chr (not range, not MCCE leader) */ - if (!SEE(RANGE) && !ISCELEADER(v, c[0])) - { - onechr(v, c[0], lp, rp); - return; - } - startc = element(v, c, c + 1); - NOERR(); - break; - case COLLEL: - startp = v->now; - endp = scanplain(v); - INSIST(startp < endp, REG_ECOLLATE); - NOERR(); - startc = element(v, startp, endp); - NOERR(); - break; - case ECLASS: - startp = v->now; - endp = scanplain(v); - INSIST(startp < endp, REG_ECOLLATE); - NOERR(); - startc = element(v, startp, endp); - NOERR(); - cv = eclass(v, startc, (v->cflags & REG_ICASE)); - NOERR(); - dovec(v, cv, lp, rp); - return; - break; - case CCLASS: - startp = v->now; - endp = scanplain(v); - INSIST(startp < endp, REG_ECTYPE); - NOERR(); - cv = cclass(v, startp, endp, (v->cflags & REG_ICASE)); - NOERR(); - dovec(v, cv, lp, rp); - return; - break; - default: - ERR(REG_ASSERT); - return; - break; - } - - if (SEE(RANGE)) - { - NEXT(); - switch (v->nexttype) - { - case PLAIN: - case RANGE: - c[0] = v->nextvalue; - NEXT(); - endc = element(v, c, c + 1); - NOERR(); - break; - case COLLEL: - startp = v->now; - endp = scanplain(v); - INSIST(startp < endp, REG_ECOLLATE); - NOERR(); - endc = element(v, startp, endp); - NOERR(); - break; - default: - ERR(REG_ERANGE); - return; - break; - } - } - else - endc = startc; - - /* - * Ranges are unportable. Actually, standard C does guarantee that - * digits are contiguous, but making that an exception is just too - * complicated. - */ - if (startc != endc) - NOTE(REG_UUNPORT); - cv = range(v, startc, endc, (v->cflags & REG_ICASE)); - NOERR(); - dovec(v, cv, lp, rp); -} - -/* - * scanplain - scan PLAIN contents of [. etc. + * This algorithm could do fancy things like analyzing the operands of | + * for common subsequences. Someday. This code is simple and finds most + * of the interesting cases. * - * Certain bits of trickery in lex.c know that this code does not try - * to look past the final bracket of the [. etc. - */ -static chr * /* just after end of sequence */ -scanplain(struct vars * v) -{ - chr *endp; - - assert(SEE(COLLEL) || SEE(ECLASS) || SEE(CCLASS)); - NEXT(); - - endp = v->now; - while (SEE(PLAIN)) - { - endp = v->now; - NEXT(); - } - - assert(SEE(END) || ISERR()); - NEXT(); - - return endp; -} - -/* - * leaders - process a cvec of collating elements to also include leaders - * Also gives all characters involved their own colors, which is almost - * certainly necessary, and sets up little disconnected subNFA. + * Note that must and mlen got initialized during setup. */ static void -leaders(struct vars * v, - struct cvec * cv) +findmust(p, g) +struct parse *p; +register struct re_guts *g; { - int mcce; - chr *p; - chr leader; - struct state *s; - struct arc *a; + register sop *scan; + sop *start; + register sop *newstart; + register sopno newlen; + register sop s; + register char *cp; + register sopno i; - v->mccepbegin = newstate(v->nfa); - v->mccepend = newstate(v->nfa); - NOERR(); - - for (mcce = 0; mcce < cv->nmcces; mcce++) - { - p = cv->mcces[mcce]; - leader = *p; - if (!haschr(cv, leader)) - { - addchr(cv, leader); - s = newstate(v->nfa); - newarc(v->nfa, PLAIN, subcolor(v->cm, leader), - v->mccepbegin, s); - okcolors(v->nfa, v->cm); - } - else - { - a = findarc(v->mccepbegin, PLAIN, - GETCOLOR(v->cm, leader)); - assert(a != NULL); - s = a->to; - assert(s != v->mccepend); - } - p++; - assert(*p != 0 && *(p + 1) == 0); /* only 2-char MCCEs for - * now */ - newarc(v->nfa, PLAIN, subcolor(v->cm, *p), s, v->mccepend); - okcolors(v->nfa, v->cm); - } -} - -/* - * onechr - fill in arcs for a plain character, and possible case complements - * This is mostly a shortcut for efficient handling of the common case. - */ -static void -onechr(struct vars * v, - chr c, - struct state * lp, - struct state * rp) -{ - if (!(v->cflags & REG_ICASE)) - { - newarc(v->nfa, PLAIN, subcolor(v->cm, c), lp, rp); - return; - } - - /* rats, need general case anyway... */ - dovec(v, allcases(v, c), lp, rp); -} - -/* - * dovec - fill in arcs for each element of a cvec - * This one has to handle the messy cases, like MCCEs and MCCE leaders. - */ -static void -dovec(struct vars * v, - struct cvec * cv, - struct state * lp, - struct state * rp) -{ - chr ch, - from, - to; - celt ce; - chr *p; - int i; - color co; - struct cvec *leads; - struct arc *a; - struct arc *pa; /* arc in prototype */ - struct state *s; - struct state *ps; /* state in prototype */ - - /* need a place to store leaders, if any */ - if (nmcces(v) > 0) - { - assert(v->mcces != NULL); - if (v->cv2 == NULL || v->cv2->nchrs < v->mcces->nchrs) - { - if (v->cv2 != NULL) - free(v->cv2); - v->cv2 = newcvec(v->mcces->nchrs, 0, v->mcces->nmcces); - NOERR(); - leads = v->cv2; - } - else - leads = clearcvec(v->cv2); - } - else - leads = NULL; - - /* first, get the ordinary characters out of the way */ - for (p = cv->chrs, i = cv->nchrs; i > 0; p++, i--) - { - ch = *p; - if (!ISCELEADER(v, ch)) - newarc(v->nfa, PLAIN, subcolor(v->cm, ch), lp, rp); - else - { - assert(singleton(v->cm, ch)); - assert(leads != NULL); - if (!haschr(leads, ch)) - addchr(leads, ch); - } - } - - /* and the ranges */ - for (p = cv->ranges, i = cv->nranges; i > 0; p += 2, i--) - { - from = *p; - to = *(p + 1); - while (from <= to && (ce = nextleader(v, from, to)) != NOCELT) - { - if (from < ce) - subrange(v, from, ce - 1, lp, rp); - assert(singleton(v->cm, ce)); - assert(leads != NULL); - if (!haschr(leads, ce)) - addchr(leads, ce); - from = ce + 1; - } - if (from <= to) - subrange(v, from, to, lp, rp); - } - - if ((leads == NULL || leads->nchrs == 0) && cv->nmcces == 0) + /* avoid making error situations worse */ + if (p->error != 0) return; - /* deal with the MCCE leaders */ - NOTE(REG_ULOCALE); - for (p = leads->chrs, i = leads->nchrs; i > 0; p++, i--) - { - co = GETCOLOR(v->cm, *p); - a = findarc(lp, PLAIN, co); - if (a != NULL) - s = a->to; - else - { - s = newstate(v->nfa); - NOERR(); - newarc(v->nfa, PLAIN, co, lp, s); - NOERR(); + /* find the longest OCHAR sequence in strip */ + newlen = 0; + scan = g->strip + 1; + do { + s = *scan++; + switch (OP(s)) { + case OCHAR: /* sequence member */ + if (newlen == 0) /* new sequence */ + newstart = scan - 1; + newlen++; + break; + case OPLUS_: /* things that don't break one */ + case OLPAREN: + case ORPAREN: + break; + case OQUEST_: /* things that must be skipped */ + case OCH_: + scan--; + do { + scan += OPND(s); + s = *scan; + /* assert() interferes w debug printouts */ + if (OP(s) != O_QUEST && OP(s) != O_CH && + OP(s) != OOR2) { + g->iflags |= BAD; + return; + } + } while (OP(s) != O_QUEST && OP(s) != O_CH); + /* fallthrough */ + default: /* things that break a sequence */ + if (newlen > g->mlen) { /* ends one */ + start = newstart; + g->mlen = newlen; + } + newlen = 0; + break; } - pa = findarc(v->mccepbegin, PLAIN, co); - assert(pa != NULL); - ps = pa->to; - newarc(v->nfa, '$', 1, s, rp); - newarc(v->nfa, '$', 0, s, rp); - colorcomplement(v->nfa, v->cm, AHEAD, ps, s, rp); - NOERR(); - } + } while (OP(s) != OEND); - /* and the MCCEs */ - for (i = 0; i < cv->nmcces; i++) - { - p = cv->mcces[i]; - assert(singleton(v->cm, *p)); - if (!singleton(v->cm, *p)) - { - ERR(REG_ASSERT); - return; - } - ch = *p++; - co = GETCOLOR(v->cm, ch); - a = findarc(lp, PLAIN, co); - if (a != NULL) - s = a->to; - else - { - s = newstate(v->nfa); - NOERR(); - newarc(v->nfa, PLAIN, co, lp, s); - NOERR(); - } - assert(*p != 0); /* at least two chars */ - assert(singleton(v->cm, *p)); - ch = *p++; - co = GETCOLOR(v->cm, ch); - assert(*p == 0); /* and only two, for now */ - newarc(v->nfa, PLAIN, co, s, rp); - NOERR(); - } -} - -/* - * nextleader - find next MCCE leader within range - */ -static celt /* NOCELT means none */ -nextleader(struct vars * v, - chr from, - chr to) -{ - int i; - chr *p; - chr ch; - celt it = NOCELT; - - if (v->mcces == NULL) - return it; - - for (i = v->mcces->nchrs, p = v->mcces->chrs; i > 0; i--, p++) - { - ch = *p; - if (from <= ch && ch <= to) - if (it == NOCELT || ch < it) - it = ch; - } - return it; -} - -/* - * wordchrs - set up word-chr list for word-boundary stuff, if needed - * - * The list is kept as a bunch of arcs between two dummy states; it's - * disposed of by the unreachable-states sweep in NFA optimization. - * Does NEXT(). Must not be called from any unusual lexical context. - * This should be reconciled with the \w etc. handling in lex.c, and - * should be cleaned up to reduce dependencies on input scanning. - */ -static void -wordchrs(struct vars * v) -{ - struct state *left; - struct state *right; - - if (v->wordchrs != NULL) - { - NEXT(); /* for consistency */ - return; - } - - left = newstate(v->nfa); - right = newstate(v->nfa); - NOERR(); - /* fine point: implemented with [::], and lexer will set REG_ULOCALE */ - lexword(v); - NEXT(); - assert(v->savenow != NULL && SEE('[')); - bracket(v, left, right); - assert((v->savenow != NULL && SEE(']')) || ISERR()); - NEXT(); - NOERR(); - v->wordchrs = left; -} - -/* - * subre - allocate a subre - */ -static struct subre * -subre(struct vars * v, - int op, - int flags, - struct state * begin, - struct state * end) -{ - struct subre *ret; - - ret = v->treefree; - if (ret != NULL) - v->treefree = ret->left; - else - { - ret = (struct subre *) MALLOC(sizeof(struct subre)); - if (ret == NULL) - { - ERR(REG_ESPACE); - return NULL; - } - ret->chain = v->treechain; - v->treechain = ret; - } - - assert(strchr("|.b(=", op) != NULL); - - ret->op = op; - ret->flags = flags; - ret->retry = 0; - ret->subno = 0; - ret->min = ret->max = 1; - ret->left = NULL; - ret->right = NULL; - ret->begin = begin; - ret->end = end; - ZAPCNFA(ret->cnfa); - - return ret; -} - -/* - * freesubre - free a subRE subtree - */ -static void -freesubre(struct vars * v, /* might be NULL */ - struct subre * sr) -{ - if (sr == NULL) + if (g->mlen == 0) /* there isn't one */ return; - if (sr->left != NULL) - freesubre(v, sr->left); - if (sr->right != NULL) - freesubre(v, sr->right); - - freesrnode(v, sr); -} - -/* - * freesrnode - free one node in a subRE subtree - */ -static void -freesrnode(struct vars * v, /* might be NULL */ - struct subre * sr) -{ - if (sr == NULL) + /* turn it into a character string */ + g->must = malloc((size_t)g->mlen + 1); + if (g->must == NULL) { /* argh; just forget it */ + g->mlen = 0; return; - - if (!NULLCNFA(sr->cnfa)) - freecnfa(&sr->cnfa); - sr->flags = 0; - - if (v != NULL) - { - sr->left = v->treefree; - v->treefree = sr; } - else - FREE(sr); + cp = g->must; + scan = start; + for (i = g->mlen; i > 0; i--) { + while (OP(s = *scan++) != OCHAR) + continue; + assert(cp < g->must + g->mlen); + *cp++ = (char)OPND(s); + } + assert(cp == g->must + g->mlen); + *cp++ = '\0'; /* just on general principles */ } /* - * optst - optimize a subRE subtree + - pluscount - count + nesting + == static sopno pluscount(register struct parse *p, register struct re_guts *g); */ -static void -optst(struct vars * v, - struct subre * t) +static sopno /* nesting depth */ +pluscount(p, g) +struct parse *p; +register struct re_guts *g; { - if (t == NULL) - return; + register sop *scan; + register sop s; + register sopno plusnest = 0; + register sopno maxnest = 0; - /* recurse through children */ - if (t->left != NULL) - optst(v, t->left); - if (t->right != NULL) - optst(v, t->right); -} + if (p->error != 0) + return(0); /* there may not be an OEND */ -/* - * numst - number tree nodes (assigning retry indexes) - */ -static int /* next number */ -numst(struct subre * t, - int start) /* starting point for subtree numbers */ -{ - int i; - - assert(t != NULL); - - i = start; - t->retry = (short) i++; - if (t->left != NULL) - i = numst(t->left, i); - if (t->right != NULL) - i = numst(t->right, i); - return i; -} - -/* - * markst - mark tree nodes as INUSE - */ -static void -markst(struct subre * t) -{ - assert(t != NULL); - - t->flags |= INUSE; - if (t->left != NULL) - markst(t->left); - if (t->right != NULL) - markst(t->right); -} - -/* - * cleanst - free any tree nodes not marked INUSE - */ -static void -cleanst(struct vars * v) -{ - struct subre *t; - struct subre *next; - - for (t = v->treechain; t != NULL; t = next) - { - next = t->chain; - if (!(t->flags & INUSE)) - FREE(t); + scan = g->strip + 1; + do { + s = *scan++; + switch (OP(s)) { + case OPLUS_: + plusnest++; + break; + case O_PLUS: + if (plusnest > maxnest) + maxnest = plusnest; + plusnest--; + break; } - v->treechain = NULL; - v->treefree = NULL; /* just on general principles */ + } while (OP(s) != OEND); + if (plusnest != 0) + g->iflags |= BAD; + return(maxnest); } - -/* - * nfatree - turn a subRE subtree into a tree of compacted NFAs - */ -static long /* optimize results from top node */ -nfatree(struct vars * v, - struct subre * t, - FILE *f) /* for debug output */ -{ - assert(t != NULL && t->begin != NULL); - - if (t->left != NULL) - (DISCARD) nfatree(v, t->left, f); - if (t->right != NULL) - (DISCARD) nfatree(v, t->right, f); - - return nfanode(v, t, f); -} - -/* - * nfanode - do one NFA for nfatree - */ -static long /* optimize results */ -nfanode(struct vars * v, - struct subre * t, - FILE *f) /* for debug output */ -{ - struct nfa *nfa; - long ret = 0; - - assert(t->begin != NULL); - -#ifdef REG_DEBUG - if (f != NULL) - { - char idbuf[50]; - - fprintf(f, "\n\n\n========= TREE NODE %s ==========\n", - stid(t, idbuf, sizeof(idbuf))); - } -#endif - nfa = newnfa(v, v->cm, v->nfa); - NOERRZ(); - dupnfa(nfa, t->begin, t->end, nfa->init, nfa->final); - if (!ISERR()) - { - specialcolors(nfa); - ret = optimize(nfa, f); - } - if (!ISERR()) - compact(nfa, &t->cnfa); - - freenfa(nfa); - return ret; -} - -/* - * newlacon - allocate a lookahead-constraint subRE - */ -static int /* lacon number */ -newlacon(struct vars * v, - struct state * begin, - struct state * end, - int pos) -{ - int n; - struct subre *sub; - - if (v->nlacons == 0) - { - v->lacons = (struct subre *) MALLOC(2 * sizeof(struct subre)); - n = 1; /* skip 0th */ - v->nlacons = 2; - } - else - { - v->lacons = (struct subre *) REALLOC(v->lacons, - (v->nlacons + 1) * sizeof(struct subre)); - n = v->nlacons++; - } - if (v->lacons == NULL) - { - ERR(REG_ESPACE); - return 0; - } - sub = &v->lacons[n]; - sub->begin = begin; - sub->end = end; - sub->subno = pos; - ZAPCNFA(sub->cnfa); - return n; -} - -/* - * freelacons - free lookahead-constraint subRE vector - */ -static void -freelacons(struct subre * subs, - int n) -{ - struct subre *sub; - int i; - - assert(n > 0); - for (sub = subs + 1, i = n - 1; i > 0; sub++, i--) /* no 0th */ - if (!NULLCNFA(sub->cnfa)) - freecnfa(&sub->cnfa); - FREE(subs); -} - -/* - * rfree - free a whole RE (insides of regfree) - */ -static void -rfree(regex_t *re) -{ - struct guts *g; - - if (re == NULL || re->re_magic != REMAGIC) - return; - - re->re_magic = 0; /* invalidate RE */ - g = (struct guts *) re->re_guts; - re->re_guts = NULL; - re->re_fns = NULL; - g->magic = 0; - freecm(&g->cmap); - if (g->tree != NULL) - freesubre((struct vars *) NULL, g->tree); - if (g->lacons != NULL) - freelacons(g->lacons, g->nlacons); - if (!NULLCNFA(g->search)) - freecnfa(&g->search); - FREE(g); -} - -#ifdef REG_DEBUG - -/* - * dump - dump an RE in human-readable form - */ -static void -dump(regex_t *re, - FILE *f) -{ - struct guts *g; - int i; - - if (re->re_magic != REMAGIC) - fprintf(f, "bad magic number (0x%x not 0x%x)\n", re->re_magic, - REMAGIC); - if (re->re_guts == NULL) - { - fprintf(f, "NULL guts!!!\n"); - return; - } - g = (struct guts *) re->re_guts; - if (g->magic != GUTSMAGIC) - fprintf(f, "bad guts magic number (0x%x not 0x%x)\n", g->magic, - GUTSMAGIC); - - fprintf(f, "\n\n\n========= DUMP ==========\n"); - fprintf(f, "nsub %d, info 0%lo, csize %d, ntree %d\n", - re->re_nsub, re->re_info, re->re_csize, g->ntree); - - dumpcolors(&g->cmap, f); - if (!NULLCNFA(g->search)) - { - printf("\nsearch:\n"); - dumpcnfa(&g->search, f); - } - for (i = 1; i < g->nlacons; i++) - { - fprintf(f, "\nla%d (%s):\n", i, - (g->lacons[i].subno) ? "positive" : "negative"); - dumpcnfa(&g->lacons[i].cnfa, f); - } - fprintf(f, "\n"); - dumpst(g->tree, f, 0); -} - -/* - * dumpst - dump a subRE tree - */ -static void -dumpst(struct subre * t, - FILE *f, - int nfapresent) /* is the original NFA still around? */ -{ - if (t == NULL) - fprintf(f, "null tree\n"); - else - stdump(t, f, nfapresent); - fflush(f); -} - -/* - * stdump - recursive guts of dumpst - */ -static void -stdump(struct subre * t, - FILE *f, - int nfapresent) /* is the original NFA still around? */ -{ - char idbuf[50]; - - fprintf(f, "%s. `%c'", stid(t, idbuf, sizeof(idbuf)), t->op); - if (t->flags & LONGER) - fprintf(f, " longest"); - if (t->flags & SHORTER) - fprintf(f, " shortest"); - if (t->flags & MIXED) - fprintf(f, " hasmixed"); - if (t->flags & CAP) - fprintf(f, " hascapture"); - if (t->flags & BACKR) - fprintf(f, " hasbackref"); - if (!(t->flags & INUSE)) - fprintf(f, " UNUSED"); - if (t->subno != 0) - fprintf(f, " (#%d)", t->subno); - if (t->min != 1 || t->max != 1) - { - fprintf(f, " {%d,", t->min); - if (t->max != INFINITY) - fprintf(f, "%d", t->max); - fprintf(f, "}"); - } - if (nfapresent) - fprintf(f, " %ld-%ld", (long) t->begin->no, (long) t->end->no); - if (t->left != NULL) - fprintf(f, " L:%s", stid(t->left, idbuf, sizeof(idbuf))); - if (t->right != NULL) - fprintf(f, " R:%s", stid(t->right, idbuf, sizeof(idbuf))); - if (!NULLCNFA(t->cnfa)) - { - fprintf(f, "\n"); - dumpcnfa(&t->cnfa, f); - fprintf(f, "\n"); - } - if (t->left != NULL) - stdump(t->left, f, nfapresent); - if (t->right != NULL) - stdump(t->right, f, nfapresent); -} - -/* - * stid - identify a subtree node for dumping - */ -static char * /* points to buf or constant string */ -stid(struct subre * t, - char *buf, - size_t bufsize) -{ - /* big enough for hex int or decimal t->retry? */ - if (bufsize < sizeof(int) * 2 + 3 || bufsize < sizeof(t->retry) * 3 + 1) - return "unable"; - if (t->retry != 0) - sprintf(buf, "%d", t->retry); - else - sprintf(buf, "0x%x", (int) t); /* may lose bits, that's okay */ - return buf; -} -#endif /* REG_DEBUG */ - - -#include "regc_lex.c" -#include "regc_color.c" -#include "regc_nfa.c" -#include "regc_cvec.c" -#include "regc_locale.c" diff --git a/src/regex/regcustom.h b/src/regex/regcustom.h deleted file mode 100644 index 15ea15f495..0000000000 --- a/src/regex/regcustom.h +++ /dev/null @@ -1,92 +0,0 @@ -/* - * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. - * - * Development of this software was funded, in part, by Cray Research Inc., - * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics - * Corporation, none of whom are responsible for the results. The author - * thanks all of them. - * - * Redistribution and use in source and binary forms -- with or without - * modification -- are permitted for any purpose, provided that - * redistributions in source form retain this entire copyright notice and - * indicate the origin and nature of any modifications. - * - * I'd appreciate being given credit for this package in the documentation - * of software which uses it, but that is not a requirement. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, - * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY - * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL - * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; - * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR - * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF - * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * $Id$ - */ - -/* headers if any */ -#include -#include -#include -#include -#include -#ifndef wxCHECK_GCC_VERSION -#define wxCHECK_GCC_VERSION( major, minor ) \ - ( defined(__GNUC__) && defined(__GNUC_MINOR__) \ - && ( ( __GNUC__ > (major) ) \ - || ( __GNUC__ == (major) && __GNUC_MINOR__ >= (minor) ) ) ) -#endif - -#ifdef wxUSE_NEW_REGEX - - #if !wxUSE_UNICODE - # define wx_wchar char - #else // Unicode - #if (defined(__GNUC__) && !wxCHECK_GCC_VERSION(2, 96)) - # define wx_wchar __WCHAR_TYPE__ - #else // __WCHAR_TYPE__ and gcc < 2.96 - // standard case - # define wx_wchar wchar_t - #endif // __WCHAR_TYPE__ - #endif // ASCII/Unicode - -#else - - #define wx_wchar char - -#endif - - -/* overrides for regguts.h definitions, if any */ -#define FUNCPTR(name, args) (*name) args -#define MALLOC(n) malloc(n) -#define FREE(p) free(VS(p)) -#define REALLOC(p,n) realloc(VS(p),n) - -/* internal character type and related */ -typedef wx_wchar chr; /* the type itself */ -typedef unsigned uchr; /* unsigned type that will hold a chr */ -typedef int celt; /* type to hold chr, MCCE number, or - * NOCELT */ - -#define NOCELT (-1) /* celt value which is not valid chr or - * MCCE */ -#define CHR(c) ((unsigned char) (c)) /* turn char literal into chr - * literal */ -#define DIGITVAL(c) ((c)-'0') /* turn chr digit into its value */ -#define CHRBITS 32 /* bits in a chr; must not use sizeof */ -#define CHR_MIN 0x00000000 /* smallest and largest chr; the value */ -#define CHR_MAX 0xfffffffe /* CHR_MAX-CHR_MIN+1 should fit in uchr */ - -/* functions operating on chr */ -#define iscalnum(x) wx_isalnum(x) -#define iscalpha(x) wx_isalpha(x) -#define iscdigit(x) wx_isdigit(x) -#define iscspace(x) wx_isspace(x) - -/* and pick up the standard header */ -#include "regex.h" diff --git a/src/regex/rege_dfa.c b/src/regex/rege_dfa.c deleted file mode 100644 index 5347b90d73..0000000000 --- a/src/regex/rege_dfa.c +++ /dev/null @@ -1,699 +0,0 @@ -/* - * DFA routines - * This file is #included by regexec.c. - * - * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. - * - * Development of this software was funded, in part, by Cray Research Inc., - * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics - * Corporation, none of whom are responsible for the results. The author - * thanks all of them. - * - * Redistribution and use in source and binary forms -- with or without - * modification -- are permitted for any purpose, provided that - * redistributions in source form retain this entire copyright notice and - * indicate the origin and nature of any modifications. - * - * I'd appreciate being given credit for this package in the documentation - * of software which uses it, but that is not a requirement. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, - * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY - * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL - * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; - * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR - * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF - * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * $Header$ - * - */ - -/* - * longest - longest-preferred matching engine - */ -static chr * /* endpoint, or NULL */ -longest(struct vars * v, /* used only for debug and exec flags */ - struct dfa * d, - chr *start, /* where the match should start */ - chr *stop, /* match must end at or before here */ - int *hitstopp) /* record whether hit v->stop, if non-NULL */ -{ - chr *cp; - chr *realstop = (stop == v->stop) ? stop : stop + 1; - color co; - struct sset *css; - struct sset *ss; - chr *post; - int i; - struct colormap *cm = d->cm; - - /* initialize */ - css = initialize(v, d, start); - cp = start; - if (hitstopp != NULL) - *hitstopp = 0; - - /* startup */ - FDEBUG(("+++ startup +++\n")); - if (cp == v->start) - { - co = d->cnfa->bos[(v->eflags & REG_NOTBOL) ? 0 : 1]; - FDEBUG(("color %ld\n", (long) co)); - } - else - { - co = GETCOLOR(cm, *(cp - 1)); - FDEBUG(("char %c, color %ld\n", (char) *(cp - 1), (long) co)); - } - css = miss(v, d, css, co, cp, start); - if (css == NULL) - return NULL; - css->lastseen = cp; - - /* main loop */ - if (v->eflags & REG_FTRACE) - while (cp < realstop) - { - FDEBUG(("+++ at c%d +++\n", css - d->ssets)); - co = GETCOLOR(cm, *cp); - FDEBUG(("char %c, color %ld\n", (char) *cp, (long) co)); - ss = css->outs[co]; - if (ss == NULL) - { - ss = miss(v, d, css, co, cp + 1, start); - if (ss == NULL) - break; /* NOTE BREAK OUT */ - } - cp++; - ss->lastseen = cp; - css = ss; - } - else - while (cp < realstop) - { - co = GETCOLOR(cm, *cp); - ss = css->outs[co]; - if (ss == NULL) - { - ss = miss(v, d, css, co, cp + 1, start); - if (ss == NULL) - break; /* NOTE BREAK OUT */ - } - cp++; - ss->lastseen = cp; - css = ss; - } - - /* shutdown */ - FDEBUG(("+++ shutdown at c%d +++\n", css - d->ssets)); - if (cp == v->stop && stop == v->stop) - { - if (hitstopp != NULL) - *hitstopp = 1; - co = d->cnfa->eos[(v->eflags & REG_NOTEOL) ? 0 : 1]; - FDEBUG(("color %ld\n", (long) co)); - ss = miss(v, d, css, co, cp, start); - /* special case: match ended at eol? */ - if (ss != NULL && (ss->flags & POSTSTATE)) - return cp; - else if (ss != NULL) - ss->lastseen = cp; /* to be tidy */ - } - - /* find last match, if any */ - post = d->lastpost; - for (ss = d->ssets, i = d->nssused; i > 0; ss++, i--) - if ((ss->flags & POSTSTATE) && post != ss->lastseen && - (post == NULL || post < ss->lastseen)) - post = ss->lastseen; - if (post != NULL) /* found one */ - return post - 1; - - return NULL; -} - -/* - * shortest - shortest-preferred matching engine - */ -static chr * /* endpoint, or NULL */ -shortest(struct vars * v, - struct dfa * d, - chr *start, /* where the match should start */ - chr *min, /* match must end at or after here */ - chr *max, /* match must end at or before here */ - chr **coldp, /* store coldstart pointer here, if - * nonNULL */ - int *hitstopp) /* record whether hit v->stop, if non-NULL */ -{ - chr *cp; - chr *realmin = (min == v->stop) ? min : min + 1; - chr *realmax = (max == v->stop) ? max : max + 1; - color co; - struct sset *css; - struct sset *ss; - struct colormap *cm = d->cm; - - /* initialize */ - css = initialize(v, d, start); - cp = start; - if (hitstopp != NULL) - *hitstopp = 0; - - /* startup */ - FDEBUG(("--- startup ---\n")); - if (cp == v->start) - { - co = d->cnfa->bos[(v->eflags & REG_NOTBOL) ? 0 : 1]; - FDEBUG(("color %ld\n", (long) co)); - } - else - { - co = GETCOLOR(cm, *(cp - 1)); - FDEBUG(("char %c, color %ld\n", (char) *(cp - 1), (long) co)); - } - css = miss(v, d, css, co, cp, start); - if (css == NULL) - return NULL; - css->lastseen = cp; - ss = css; - - /* main loop */ - if (v->eflags & REG_FTRACE) - while (cp < realmax) - { - FDEBUG(("--- at c%d ---\n", css - d->ssets)); - co = GETCOLOR(cm, *cp); - FDEBUG(("char %c, color %ld\n", (char) *cp, (long) co)); - ss = css->outs[co]; - if (ss == NULL) - { - ss = miss(v, d, css, co, cp + 1, start); - if (ss == NULL) - break; /* NOTE BREAK OUT */ - } - cp++; - ss->lastseen = cp; - css = ss; - if ((ss->flags & POSTSTATE) && cp >= realmin) - break; /* NOTE BREAK OUT */ - } - else - while (cp < realmax) - { - co = GETCOLOR(cm, *cp); - ss = css->outs[co]; - if (ss == NULL) - { - ss = miss(v, d, css, co, cp + 1, start); - if (ss == NULL) - break; /* NOTE BREAK OUT */ - } - cp++; - ss->lastseen = cp; - css = ss; - if ((ss->flags & POSTSTATE) && cp >= realmin) - break; /* NOTE BREAK OUT */ - } - - if (ss == NULL) - return NULL; - - if (coldp != NULL) /* report last no-progress state set, if - * any */ - *coldp = lastcold(v, d); - - if ((ss->flags & POSTSTATE) && cp > min) - { - assert(cp >= realmin); - cp--; - } - else if (cp == v->stop && max == v->stop) - { - co = d->cnfa->eos[(v->eflags & REG_NOTEOL) ? 0 : 1]; - FDEBUG(("color %ld\n", (long) co)); - ss = miss(v, d, css, co, cp, start); - /* match might have ended at eol */ - if ((ss == NULL || !(ss->flags & POSTSTATE)) && hitstopp != NULL) - *hitstopp = 1; - } - - if (ss == NULL || !(ss->flags & POSTSTATE)) - return NULL; - - return cp; -} - -/* - * lastcold - determine last point at which no progress had been made - */ -static chr * /* endpoint, or NULL */ -lastcold(struct vars * v, - struct dfa * d) -{ - struct sset *ss; - chr *nopr; - int i; - - nopr = d->lastnopr; - if (nopr == NULL) - nopr = v->start; - for (ss = d->ssets, i = d->nssused; i > 0; ss++, i--) - if ((ss->flags & NOPROGRESS) && nopr < ss->lastseen) - nopr = ss->lastseen; - return nopr; -} - -/* - * newdfa - set up a fresh DFA - */ -static struct dfa * -newdfa(struct vars * v, - struct cnfa * cnfa, - struct colormap * cm, - struct smalldfa * small) /* preallocated space, may be NULL */ -{ - struct dfa *d; - size_t nss = cnfa->nstates * 2; - int wordsper = (cnfa->nstates + UBITS - 1) / UBITS; - struct smalldfa *smallwas = small; - - assert(cnfa != NULL && cnfa->nstates != 0); - - if (nss <= FEWSTATES && cnfa->ncolors <= FEWCOLORS) - { - assert(wordsper == 1); - if (small == NULL) - { - small = (struct smalldfa *) MALLOC( - sizeof(struct smalldfa)); - if (small == NULL) - { - ERR(REG_ESPACE); - return NULL; - } - } - d = &small->dfa; - d->ssets = small->ssets; - d->statesarea = small->statesarea; - d->work = &d->statesarea[nss]; - d->outsarea = small->outsarea; - d->incarea = small->incarea; - d->cptsmalloced = 0; - d->mallocarea = (smallwas == NULL) ? (char *) small : NULL; - } - else - { - d = (struct dfa *) MALLOC(sizeof(struct dfa)); - if (d == NULL) - { - ERR(REG_ESPACE); - return NULL; - } - d->ssets = (struct sset *) MALLOC(nss * sizeof(struct sset)); - d->statesarea = (unsigned *) MALLOC((nss + WORK) * wordsper * - sizeof(unsigned)); - d->work = &d->statesarea[nss * wordsper]; - d->outsarea = (struct sset **) MALLOC(nss * cnfa->ncolors * - sizeof(struct sset *)); - d->incarea = (struct arcp *) MALLOC(nss * cnfa->ncolors * - sizeof(struct arcp)); - d->cptsmalloced = 1; - d->mallocarea = (char *) d; - if (d->ssets == NULL || d->statesarea == NULL || - d->outsarea == NULL || d->incarea == NULL) - { - freedfa(d); - ERR(REG_ESPACE); - return NULL; - } - } - - d->nssets = (v->eflags & REG_SMALL) ? 7 : nss; - d->nssused = 0; - d->nstates = cnfa->nstates; - d->ncolors = cnfa->ncolors; - d->wordsper = wordsper; - d->cnfa = cnfa; - d->cm = cm; - d->lastpost = NULL; - d->lastnopr = NULL; - d->search = d->ssets; - - /* initialization of sset fields is done as needed */ - - return d; -} - -/* - * freedfa - free a DFA - */ -static void -freedfa(struct dfa * d) -{ - if (d->cptsmalloced) - { - if (d->ssets != NULL) - FREE(d->ssets); - if (d->statesarea != NULL) - FREE(d->statesarea); - if (d->outsarea != NULL) - FREE(d->outsarea); - if (d->incarea != NULL) - FREE(d->incarea); - } - - if (d->mallocarea != NULL) - FREE(d->mallocarea); -} - -/* - * hash - construct a hash code for a bitvector - * - * There are probably better ways, but they're more expensive. - */ -static unsigned -hash(unsigned *uv, - int n) -{ - int i; - unsigned h; - - h = 0; - for (i = 0; i < n; i++) - h ^= uv[i]; - return h; -} - -/* - * initialize - hand-craft a cache entry for startup, otherwise get ready - */ -static struct sset * -initialize(struct vars * v, /* used only for debug flags */ - struct dfa * d, - chr *start) -{ - struct sset *ss; - int i; - - /* is previous one still there? */ - if (d->nssused > 0 && (d->ssets[0].flags & STARTER)) - ss = &d->ssets[0]; - else - { /* no, must (re)build it */ - ss = getvacant(v, d, start, start); - for (i = 0; i < d->wordsper; i++) - ss->states[i] = 0; - BSET(ss->states, d->cnfa->pre); - ss->hash = HASH(ss->states, d->wordsper); - assert(d->cnfa->pre != d->cnfa->post); - ss->flags = STARTER | LOCKED | NOPROGRESS; - /* lastseen dealt with below */ - } - - for (i = 0; i < d->nssused; i++) - d->ssets[i].lastseen = NULL; - ss->lastseen = start; /* maybe untrue, but harmless */ - d->lastpost = NULL; - d->lastnopr = NULL; - return ss; -} - -/* - * miss - handle a cache miss - */ -static struct sset * /* NULL if goes to empty set */ -miss(struct vars * v, /* used only for debug flags */ - struct dfa * d, - struct sset * css, - pcolor co, - chr *cp, /* next chr */ - chr *start) /* where the attempt got started */ -{ - struct cnfa *cnfa = d->cnfa; - int i; - unsigned h; - struct carc *ca; - struct sset *p; - int ispost; - int noprogress; - int gotstate; - int dolacons; - int sawlacons; - - /* for convenience, we can be called even if it might not be a miss */ - if (css->outs[co] != NULL) - { - FDEBUG(("hit\n")); - return css->outs[co]; - } - FDEBUG(("miss\n")); - - /* first, what set of states would we end up in? */ - for (i = 0; i < d->wordsper; i++) - d->work[i] = 0; - ispost = 0; - noprogress = 1; - gotstate = 0; - for (i = 0; i < d->nstates; i++) - if (ISBSET(css->states, i)) - for (ca = cnfa->states[i] + 1; ca->co != COLORLESS; ca++) - if (ca->co == co) - { - BSET(d->work, ca->to); - gotstate = 1; - if (ca->to == cnfa->post) - ispost = 1; - if (!cnfa->states[ca->to]->co) - noprogress = 0; - FDEBUG(("%d -> %d\n", i, ca->to)); - } - dolacons = (gotstate) ? (cnfa->flags & HASLACONS) : 0; - sawlacons = 0; - while (dolacons) - { /* transitive closure */ - dolacons = 0; - for (i = 0; i < d->nstates; i++) - if (ISBSET(d->work, i)) - for (ca = cnfa->states[i] + 1; ca->co != COLORLESS; - ca++) - { - if (ca->co <= cnfa->ncolors) - continue; /* NOTE CONTINUE */ - sawlacons = 1; - if (ISBSET(d->work, ca->to)) - continue; /* NOTE CONTINUE */ - if (!lacon(v, cnfa, cp, ca->co)) - continue; /* NOTE CONTINUE */ - BSET(d->work, ca->to); - dolacons = 1; - if (ca->to == cnfa->post) - ispost = 1; - if (!cnfa->states[ca->to]->co) - noprogress = 0; - FDEBUG(("%d :> %d\n", i, ca->to)); - } - } - if (!gotstate) - return NULL; - h = HASH(d->work, d->wordsper); - - /* next, is that in the cache? */ - for (p = d->ssets, i = d->nssused; i > 0; p++, i--) - if (HIT(h, d->work, p, d->wordsper)) - { - FDEBUG(("cached c%d\n", p - d->ssets)); - break; /* NOTE BREAK OUT */ - } - if (i == 0) - { /* nope, need a new cache entry */ - p = getvacant(v, d, cp, start); - assert(p != css); - for (i = 0; i < d->wordsper; i++) - p->states[i] = d->work[i]; - p->hash = h; - p->flags = (ispost) ? POSTSTATE : 0; - if (noprogress) - p->flags |= NOPROGRESS; - /* lastseen to be dealt with by caller */ - } - - if (!sawlacons) - { /* lookahead conds. always cache miss */ - FDEBUG(("c%d[%d]->c%d\n", css - d->ssets, co, p - d->ssets)); - css->outs[co] = p; - css->inchain[co] = p->ins; - p->ins.ss = css; - p->ins.co = (color) co; - } - return p; -} - -/* - * lacon - lookahead-constraint checker for miss() - */ -static int /* predicate: constraint satisfied? */ -lacon(struct vars * v, - struct cnfa * pcnfa, /* parent cnfa */ - chr *cp, - pcolor co) /* "color" of the lookahead constraint */ -{ - int n; - struct subre *sub; - struct dfa *d; - struct smalldfa sd; - chr *end; - - n = co - pcnfa->ncolors; - assert(n < v->g->nlacons && v->g->lacons != NULL); - FDEBUG(("=== testing lacon %d\n", n)); - sub = &v->g->lacons[n]; - d = newdfa(v, &sub->cnfa, &v->g->cmap, &sd); - if (d == NULL) - { - ERR(REG_ESPACE); - return 0; - } - end = longest(v, d, cp, v->stop, (int *) NULL); - freedfa(d); - FDEBUG(("=== lacon %d match %d\n", n, (end != NULL))); - return (sub->subno) ? (end != NULL) : (end == NULL); -} - -/* - * getvacant - get a vacant state set - * This routine clears out the inarcs and outarcs, but does not otherwise - * clear the innards of the state set -- that's up to the caller. - */ -static struct sset * -getvacant(struct vars * v, /* used only for debug flags */ - struct dfa * d, - chr *cp, - chr *start) -{ - int i; - struct sset *ss; - struct sset *p; - struct arcp ap; - struct arcp lastap; - color co; - - ss = pickss(v, d, cp, start); - assert(!(ss->flags & LOCKED)); - - /* clear out its inarcs, including self-referential ones */ - ap = ss->ins; - while ((p = ap.ss) != NULL) - { - co = ap.co; - FDEBUG(("zapping c%d's %ld outarc\n", p - d->ssets, (long) co)); - p->outs[co] = NULL; - ap = p->inchain[co]; - p->inchain[co].ss = NULL; /* paranoia */ - } - ss->ins.ss = NULL; - - /* take it off the inarc chains of the ssets reached by its outarcs */ - for (i = 0; i < d->ncolors; i++) - { - p = ss->outs[i]; - assert(p != ss); /* not self-referential */ - if (p == NULL) - continue; /* NOTE CONTINUE */ - FDEBUG(("del outarc %d from c%d's in chn\n", i, p - d->ssets)); - if (p->ins.ss == ss && p->ins.co == i) - p->ins = ss->inchain[i]; - else - { - assert(p->ins.ss != NULL); - for (ap = p->ins; ap.ss != NULL && - !(ap.ss == ss && ap.co == i); - ap = ap.ss->inchain[ap.co]) - lastap = ap; - assert(ap.ss != NULL); - lastap.ss->inchain[lastap.co] = ss->inchain[i]; - } - ss->outs[i] = NULL; - ss->inchain[i].ss = NULL; - } - - /* if ss was a success state, may need to remember location */ - if ((ss->flags & POSTSTATE) && ss->lastseen != d->lastpost && - (d->lastpost == NULL || d->lastpost < ss->lastseen)) - d->lastpost = ss->lastseen; - - /* likewise for a no-progress state */ - if ((ss->flags & NOPROGRESS) && ss->lastseen != d->lastnopr && - (d->lastnopr == NULL || d->lastnopr < ss->lastseen)) - d->lastnopr = ss->lastseen; - - return ss; -} - -/* - * pickss - pick the next stateset to be used - */ -static struct sset * -pickss(struct vars * v, /* used only for debug flags */ - struct dfa * d, - chr *cp, - chr *start) -{ - int i; - struct sset *ss; - struct sset *end; - chr *ancient; - - /* shortcut for cases where cache isn't full */ - if (d->nssused < d->nssets) - { - i = d->nssused; - d->nssused++; - ss = &d->ssets[i]; - FDEBUG(("new c%d\n", i)); - /* set up innards */ - ss->states = &d->statesarea[i * d->wordsper]; - ss->flags = 0; - ss->ins.ss = NULL; - ss->ins.co = WHITE; /* give it some value */ - ss->outs = &d->outsarea[i * d->ncolors]; - ss->inchain = &d->incarea[i * d->ncolors]; - for (i = 0; i < d->ncolors; i++) - { - ss->outs[i] = NULL; - ss->inchain[i].ss = NULL; - } - return ss; - } - - /* look for oldest, or old enough anyway */ - if (cp - start > d->nssets * 2 / 3) /* oldest 33% are expendable */ - ancient = cp - d->nssets * 2 / 3; - else - ancient = start; - for (ss = d->search, end = &d->ssets[d->nssets]; ss < end; ss++) - if ((ss->lastseen == NULL || ss->lastseen < ancient) && - !(ss->flags & LOCKED)) - { - d->search = ss + 1; - FDEBUG(("replacing c%d\n", ss - d->ssets)); - return ss; - } - for (ss = d->ssets, end = d->search; ss < end; ss++) - if ((ss->lastseen == NULL || ss->lastseen < ancient) && - !(ss->flags & LOCKED)) - { - d->search = ss + 1; - FDEBUG(("replacing c%d\n", ss - d->ssets)); - return ss; - } - - /* nobody's old enough?!? -- something's really wrong */ - FDEBUG(("can't find victim to replace!\n")); - assert(NOTREACHED); - ERR(REG_ASSERT); - return d->ssets; -} diff --git a/src/regex/regerror.c b/src/regex/regerror.c index 4fd08556ba..b4c839adf5 100644 --- a/src/regex/regerror.c +++ b/src/regex/regerror.c @@ -1,126 +1,130 @@ +#if defined(__MWERKS__) && !defined(__MACH__) +typedef long off_t ; +#else +#include +#endif +#include +#include +#include +#include +#include +#include "regex.h" + +#include "utils.h" +#include "regerror.ih" + /* - * regerror - error-code expansion - * - * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. - * - * Development of this software was funded, in part, by Cray Research Inc., - * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics - * Corporation, none of whom are responsible for the results. The author - * thanks all of them. - * - * Redistribution and use in source and binary forms -- with or without - * modification -- are permitted for any purpose, provided that - * redistributions in source form retain this entire copyright notice and - * indicate the origin and nature of any modifications. - * - * I'd appreciate being given credit for this package in the documentation - * of software which uses it, but that is not a requirement. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, - * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY - * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL - * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; - * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR - * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF - * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * $Header: /projects/cvsroot/pgsql-server/src/backend/regex/regerror.c,v 1.26 2003/08/04 00:43:21 momjian Exp $ - * + = #define REG_OKAY 0 + = #define REG_NOMATCH 1 + = #define REG_BADPAT 2 + = #define REG_ECOLLATE 3 + = #define REG_ECTYPE 4 + = #define REG_EESCAPE 5 + = #define REG_ESUBREG 6 + = #define REG_EBRACK 7 + = #define REG_EPAREN 8 + = #define REG_EBRACE 9 + = #define REG_BADBR 10 + = #define REG_ERANGE 11 + = #define REG_ESPACE 12 + = #define REG_BADRPT 13 + = #define REG_EMPTY 14 + = #define REG_ASSERT 15 + = #define REG_INVARG 16 + = #define REG_ATOI 255 // convert name to number (!) + = #define REG_ITOA 0400 // convert number to name (!) */ - -#include "regguts.h" - -/* unknown-error explanation */ -static char unk[] = "*** unknown regex error code 0x%x ***"; - -/* struct to map among codes, code names, and explanations */ -static struct rerr -{ +static struct rerr { int code; char *name; char *explain; -} rerrs[] = - -{ - /* the actual table is built from regex.h */ -#include "regerrs.h" - { - -1, "", "oops" - }, /* explanation special-cased in code */ +} rerrs[] = { + { REG_OKAY, "REG_OKAY", "no errors detected" }, + { REG_NOMATCH, "REG_NOMATCH", "regexec() failed to match" }, + { REG_BADPAT, "REG_BADPAT", "invalid regular expression" }, + { REG_ECOLLATE, "REG_ECOLLATE", "invalid collating element" }, + { REG_ECTYPE, "REG_ECTYPE", "invalid character class" }, + { REG_EESCAPE, "REG_EESCAPE", "trailing backslash (\\)" }, + { REG_ESUBREG, "REG_ESUBREG", "invalid backreference number" }, + { REG_EBRACK, "REG_EBRACK", "brackets ([ ]) not balanced" }, + { REG_EPAREN, "REG_EPAREN", "parentheses not balanced" }, + { REG_EBRACE, "REG_EBRACE", "braces not balanced" }, + { REG_BADBR, "REG_BADBR", "invalid repetition count(s)" }, + { REG_ERANGE, "REG_ERANGE", "invalid character range" }, + { REG_ESPACE, "REG_ESPACE", "out of memory" }, + { REG_BADRPT, "REG_BADRPT", "repetition-operator operand invalid" }, + { REG_EMPTY, "REG_EMPTY", "empty (sub)expression" }, + { REG_ASSERT, "REG_ASSERT", "\"can't happen\" -- you found a bug" }, + { REG_INVARG, "REG_INVARG", "invalid argument to regex routine" }, + { -1, "", "*** unknown regexp error code ***" }, }; -size_t -regerror(int errcode, /* error code, or REG_ATOI or REG_ITOA */ - const regex_t *preg, /* associated regex_t (unused at present) */ - char *errbuf, /* result buffer (unless errbuf_size==0) */ - size_t errbuf_size) /* available space in errbuf, can be 0 */ - { return regerror(errcode, preg, errbuf, errbuf_size); } /* - * pg_regerror - the interface to error numbers + - regerror - the interface to error numbers + = extern size_t regerror(int, const regex_t *, char *, size_t); */ /* ARGSUSED */ -size_t /* actual space needed (including NUL) */ -wx_regerror(int errcode, /* error code, or REG_ATOI or REG_ITOA */ - const regex_t *preg, /* associated regex_t (unused at present) */ - char *errbuf, /* result buffer (unless errbuf_size==0) */ - size_t errbuf_size) /* available space in errbuf, can be 0 */ +size_t +regerror(errcode, preg, errbuf, errbuf_size) +int errcode; +const regex_t *preg; +char *errbuf; +size_t errbuf_size; { - struct rerr *r; - char *msg; - char convbuf[sizeof(unk) + 50]; /* 50 = plenty for int */ - size_t len; - int icode; + register struct rerr *r; + register size_t len; + register int target = errcode &~ REG_ITOA; + register char *s; + char convbuf[50]; - switch (errcode) - { - case REG_ATOI: /* convert name to number */ + if (errcode == REG_ATOI) + s = regatoi(preg, convbuf); + else { for (r = rerrs; r->code >= 0; r++) - if (strcmp(r->name, errbuf) == 0) + if (r->code == target) break; - sprintf(convbuf, "%d", r->code); /* -1 for unknown */ - msg = convbuf; - break; - case REG_ITOA: /* convert number to name */ - icode = atoi(errbuf); /* not our problem if this fails */ - for (r = rerrs; r->code >= 0; r++) - if (r->code == icode) - break; + + if (errcode®_ITOA) { if (r->code >= 0) - msg = r->name; + (void) strcpy(convbuf, r->name); else - { /* unknown; tell him the number */ - sprintf(convbuf, "REG_%u", (unsigned) icode); - msg = convbuf; - } - break; - default: /* a real, normal error code */ - for (r = rerrs; r->code >= 0; r++) - if (r->code == errcode) - break; - if (r->code >= 0) - msg = r->explain; - else - { /* unknown; say so */ - sprintf(convbuf, unk, errcode); - msg = convbuf; - } - break; + sprintf(convbuf, "REG_0x%x", target); + assert(strlen(convbuf) < sizeof(convbuf)); + s = convbuf; + } else + s = r->explain; } - len = strlen(msg) + 1; /* space needed, including NUL */ - if (errbuf_size > 0) - { + len = strlen(s) + 1; + if (errbuf_size > 0) { if (errbuf_size > len) - strcpy(errbuf, msg); - else - { /* truncate to fit */ - memcpy(errbuf, msg, errbuf_size - 1); /*RN - was strncpy*/ - errbuf[errbuf_size - 1] = '\0'; + (void) strcpy(errbuf, s); + else { + (void) strncpy(errbuf, s, errbuf_size-1); + errbuf[errbuf_size-1] = '\0'; } } - return len; + return(len); +} + +/* + - regatoi - internal routine to implement REG_ATOI + == static char *regatoi(const regex_t *preg, char *localbuf); + */ +static char * +regatoi(preg, localbuf) +const regex_t *preg; +char *localbuf; +{ + register struct rerr *r; + + for (r = rerrs; r->code >= 0; r++) + if (strcmp(r->name, preg->re_endp) == 0) + break; + if (r->code < 0) + return("0"); + + sprintf(localbuf, "%d", r->code); + return(localbuf); } diff --git a/src/regex/regerrs.h b/src/regex/regerrs.h deleted file mode 100644 index f99dbf4f73..0000000000 --- a/src/regex/regerrs.h +++ /dev/null @@ -1,75 +0,0 @@ -/* - * $Id$ - */ - -{ - REG_OKAY, "REG_OKAY", "no errors detected" -}, - -{ - REG_NOMATCH, "REG_NOMATCH", "failed to match" -}, - -{ - REG_BADPAT, "REG_BADPAT", "invalid regexp (reg version 0.8)" -}, - -{ - REG_ECOLLATE, "REG_ECOLLATE", "invalid collating element" -}, - -{ - REG_ECTYPE, "REG_ECTYPE", "invalid character class" -}, - -{ - REG_EESCAPE, "REG_EESCAPE", "invalid escape \\ sequence" -}, - -{ - REG_ESUBREG, "REG_ESUBREG", "invalid backreference number" -}, - -{ - REG_EBRACK, "REG_EBRACK", "brackets [] not balanced" -}, - -{ - REG_EPAREN, "REG_EPAREN", "parentheses () not balanced" -}, - -{ - REG_EBRACE, "REG_EBRACE", "braces {} not balanced" -}, - -{ - REG_BADBR, "REG_BADBR", "invalid repetition count(s)" -}, - -{ - REG_ERANGE, "REG_ERANGE", "invalid character range" -}, - -{ - REG_ESPACE, "REG_ESPACE", "out of memory" -}, - -{ - REG_BADRPT, "REG_BADRPT", "quantifier operand invalid" -}, - -{ - REG_ASSERT, "REG_ASSERT", "\"can't happen\" -- you found a bug" -}, - -{ - REG_INVARG, "REG_INVARG", "invalid argument to regex function" -}, - -{ - REG_MIXED, "REG_MIXED", "character widths of regex and string differ" -}, - -{ - REG_BADOPT, "REG_BADOPT", "invalid embedded option" -}, diff --git a/src/regex/regex.7 b/src/regex/regex.7 new file mode 100644 index 0000000000..0fa180269e --- /dev/null +++ b/src/regex/regex.7 @@ -0,0 +1,235 @@ +.TH REGEX 7 "25 Oct 1995" +.BY "Henry Spencer" +.SH NAME +regex \- POSIX 1003.2 regular expressions +.SH DESCRIPTION +Regular expressions (``RE''s), +as defined in POSIX 1003.2, come in two forms: +modern REs (roughly those of +.IR egrep ; +1003.2 calls these ``extended'' REs) +and obsolete REs (roughly those of +.IR ed ; +1003.2 ``basic'' REs). +Obsolete REs mostly exist for backward compatibility in some old programs; +they will be discussed at the end. +1003.2 leaves some aspects of RE syntax and semantics open; +`\(dg' marks decisions on these aspects that +may not be fully portable to other 1003.2 implementations. +.PP +A (modern) RE is one\(dg or more non-empty\(dg \fIbranches\fR, +separated by `|'. +It matches anything that matches one of the branches. +.PP +A branch is one\(dg or more \fIpieces\fR, concatenated. +It matches a match for the first, followed by a match for the second, etc. +.PP +A piece is an \fIatom\fR possibly followed +by a single\(dg `*', `+', `?', or \fIbound\fR. +An atom followed by `*' matches a sequence of 0 or more matches of the atom. +An atom followed by `+' matches a sequence of 1 or more matches of the atom. +An atom followed by `?' matches a sequence of 0 or 1 matches of the atom. +.PP +A \fIbound\fR is `{' followed by an unsigned decimal integer, +possibly followed by `,' +possibly followed by another unsigned decimal integer, +always followed by `}'. +The integers must lie between 0 and RE_DUP_MAX (255\(dg) inclusive, +and if there are two of them, the first may not exceed the second. +An atom followed by a bound containing one integer \fIi\fR +and no comma matches +a sequence of exactly \fIi\fR matches of the atom. +An atom followed by a bound +containing one integer \fIi\fR and a comma matches +a sequence of \fIi\fR or more matches of the atom. +An atom followed by a bound +containing two integers \fIi\fR and \fIj\fR matches +a sequence of \fIi\fR through \fIj\fR (inclusive) matches of the atom. +.PP +An atom is a regular expression enclosed in `()' (matching a match for the +regular expression), +an empty set of `()' (matching the null string)\(dg, +a \fIbracket expression\fR (see below), `.' +(matching any single character), `^' (matching the null string at the +beginning of a line), `$' (matching the null string at the +end of a line), a `\e' followed by one of the characters +`^.[$()|*+?{\e' +(matching that character taken as an ordinary character), +a `\e' followed by any other character\(dg +(matching that character taken as an ordinary character, +as if the `\e' had not been present\(dg), +or a single character with no other significance (matching that character). +A `{' followed by a character other than a digit is an ordinary +character, not the beginning of a bound\(dg. +It is illegal to end an RE with `\e'. +.PP +A \fIbracket expression\fR is a list of characters enclosed in `[]'. +It normally matches any single character from the list (but see below). +If the list begins with `^', +it matches any single character +(but see below) \fInot\fR from the rest of the list. +If two characters in the list are separated by `\-', this is shorthand +for the full \fIrange\fR of characters between those two (inclusive) in the +collating sequence, +e.g. `[0\-9]' in ASCII matches any decimal digit. +It is illegal\(dg for two ranges to share an +endpoint, e.g. `a\-c\-e'. +Ranges are very collating-sequence-dependent, +and portable programs should avoid relying on them. +.PP +To include a literal `]' in the list, make it the first character +(following a possible `^'). +To include a literal `\-', make it the first or last character, +or the second endpoint of a range. +To use a literal `\-' as the first endpoint of a range, +enclose it in `[.' and `.]' to make it a collating element (see below). +With the exception of these and some combinations using `[' (see next +paragraphs), all other special characters, including `\e', lose their +special significance within a bracket expression. +.PP +Within a bracket expression, a collating element (a character, +a multi-character sequence that collates as if it were a single character, +or a collating-sequence name for either) +enclosed in `[.' and `.]' stands for the +sequence of characters of that collating element. +The sequence is a single element of the bracket expression's list. +A bracket expression containing a multi-character collating element +can thus match more than one character, +e.g. if the collating sequence includes a `ch' collating element, +then the RE `[[.ch.]]*c' matches the first five characters +of `chchcc'. +.PP +Within a bracket expression, a collating element enclosed in `[=' and +`=]' is an equivalence class, standing for the sequences of characters +of all collating elements equivalent to that one, including itself. +(If there are no other equivalent collating elements, +the treatment is as if the enclosing delimiters were `[.' and `.]'.) +For example, if o and \o'o^' are the members of an equivalence class, +then `[[=o=]]', `[[=\o'o^'=]]', and `[o\o'o^']' are all synonymous. +An equivalence class may not\(dg be an endpoint +of a range. +.PP +Within a bracket expression, the name of a \fIcharacter class\fR enclosed +in `[:' and `:]' stands for the list of all characters belonging to that +class. +Standard character class names are: +.PP +.RS +.nf +.ta 3c 6c 9c +alnum digit punct +alpha graph space +blank lower upper +cntrl print xdigit +.fi +.RE +.PP +These stand for the character classes defined in +.IR ctype (3). +A locale may provide others. +A character class may not be used as an endpoint of a range. +.PP +There are two special cases\(dg of bracket expressions: +the bracket expressions `[[:<:]]' and `[[:>:]]' match the null string at +the beginning and end of a word respectively. +A word is defined as a sequence of +word characters +which is neither preceded nor followed by +word characters. +A word character is an +.I alnum +character (as defined by +.IR ctype (3)) +or an underscore. +This is an extension, +compatible with but not specified by POSIX 1003.2, +and should be used with +caution in software intended to be portable to other systems. +.PP +In the event that an RE could match more than one substring of a given +string, +the RE matches the one starting earliest in the string. +If the RE could match more than one substring starting at that point, +it matches the longest. +Subexpressions also match the longest possible substrings, subject to +the constraint that the whole match be as long as possible, +with subexpressions starting earlier in the RE taking priority over +ones starting later. +Note that higher-level subexpressions thus take priority over +their lower-level component subexpressions. +.PP +Match lengths are measured in characters, not collating elements. +A null string is considered longer than no match at all. +For example, +`bb*' matches the three middle characters of `abbbc', +`(wee|week)(knights|nights)' matches all ten characters of `weeknights', +when `(.*).*' is matched against `abc' the parenthesized subexpression +matches all three characters, and +when `(a*)*' is matched against `bc' both the whole RE and the parenthesized +subexpression match the null string. +.PP +If case-independent matching is specified, +the effect is much as if all case distinctions had vanished from the +alphabet. +When an alphabetic that exists in multiple cases appears as an +ordinary character outside a bracket expression, it is effectively +transformed into a bracket expression containing both cases, +e.g. `x' becomes `[xX]'. +When it appears inside a bracket expression, all case counterparts +of it are added to the bracket expression, so that (e.g.) `[x]' +becomes `[xX]' and `[^x]' becomes `[^xX]'. +.PP +No particular limit is imposed on the length of REs\(dg. +Programs intended to be portable should not employ REs longer +than 256 bytes, +as an implementation can refuse to accept such REs and remain +POSIX-compliant. +.PP +Obsolete (``basic'') regular expressions differ in several respects. +`|', `+', and `?' are ordinary characters and there is no equivalent +for their functionality. +The delimiters for bounds are `\e{' and `\e}', +with `{' and `}' by themselves ordinary characters. +The parentheses for nested subexpressions are `\e(' and `\e)', +with `(' and `)' by themselves ordinary characters. +`^' is an ordinary character except at the beginning of the +RE or\(dg the beginning of a parenthesized subexpression, +`$' is an ordinary character except at the end of the +RE or\(dg the end of a parenthesized subexpression, +and `*' is an ordinary character if it appears at the beginning of the +RE or the beginning of a parenthesized subexpression +(after a possible leading `^'). +Finally, there is one new type of atom, a \fIback reference\fR: +`\e' followed by a non-zero decimal digit \fId\fR +matches the same sequence of characters +matched by the \fId\fRth parenthesized subexpression +(numbering subexpressions by the positions of their opening parentheses, +left to right), +so that (e.g.) `\e([bc]\e)\e1' matches `bb' or `cc' but not `bc'. +.SH SEE ALSO +regex(3) +.PP +POSIX 1003.2, section 2.8 (Regular Expression Notation). +.SH HISTORY +Written by Henry Spencer, based on the 1003.2 spec. +.SH BUGS +Having two kinds of REs is a botch. +.PP +The current 1003.2 spec says that `)' is an ordinary character in +the absence of an unmatched `('; +this was an unintentional result of a wording error, +and change is likely. +Avoid relying on it. +.PP +Back references are a dreadful botch, +posing major problems for efficient implementations. +They are also somewhat vaguely defined +(does +`a\e(\e(b\e)*\e2\e)*d' match `abbbd'?). +Avoid using them. +.PP +1003.2's specification of case-independent matching is vague. +The ``one case implies all cases'' definition given above +is current consensus among implementors as to the right interpretation. +.PP +The syntax for word boundaries is incredibly ugly. diff --git a/src/regex/regex.h b/src/regex/regex.h index a1fcec2e1c..d094d072d5 100644 --- a/src/regex/regex.h +++ b/src/regex/regex.h @@ -1,202 +1,74 @@ #ifndef _REGEX_H_ #define _REGEX_H_ /* never again */ -/* - * regular expressions - * - * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. - * - * Development of this software was funded, in part, by Cray Research Inc., - * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics - * Corporation, none of whom are responsible for the results. The author - * thanks all of them. - * - * Redistribution and use in source and binary forms -- with or without - * modification -- are permitted for any purpose, provided that - * redistributions in source form retain this entire copyright notice and - * indicate the origin and nature of any modifications. - * - * I'd appreciate being given credit for this package in the documentation - * of software which uses it, but that is not a requirement. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, - * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY - * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL - * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; - * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR - * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF - * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * $Id$ - */ - -/* - * Add your own defines, if needed, here. - */ +/* ========= begin header generated by ./mkh ========= */ #ifdef __cplusplus extern "C" { #endif -#include -#include -#include - -#ifndef wxCHECK_GCC_VERSION -#define wxCHECK_GCC_VERSION( major, minor ) \ - ( defined(__GNUC__) && defined(__GNUC_MINOR__) \ - && ( ( __GNUC__ > (major) ) \ - || ( __GNUC__ == (major) && __GNUC_MINOR__ >= (minor) ) ) ) -#endif - -#if !wxUSE_UNICODE -# define wx_wchar char -#else // Unicode - #if (defined(__GNUC__) && !wxCHECK_GCC_VERSION(2, 96)) -# define wx_wchar __WCHAR_TYPE__ - #else // __WCHAR_TYPE__ and gcc < 2.96 - // standard case -# define wx_wchar wchar_t - #endif // __WCHAR_TYPE__ -#endif // ASCII/Unicode - -/* - * interface types etc. - */ - -/* - * regoff_t has to be large enough to hold either off_t or ssize_t, - * and must be signed; it's only a guess that long is suitable. - */ -typedef long regoff_t; - -/* - * other interface types - */ - -/* the biggie, a compiled RE (or rather, a front end to same) */ -typedef struct -{ - int re_magic; /* magic number */ - size_t re_nsub; /* number of subexpressions */ - long re_info; /* information about RE */ -#define REG_UBACKREF 000001 -#define REG_ULOOKAHEAD 000002 -#define REG_UBOUNDS 000004 -#define REG_UBRACES 000010 -#define REG_UBSALNUM 000020 -#define REG_UPBOTCH 000040 -#define REG_UBBS 000100 -#define REG_UNONPOSIX 000200 -#define REG_UUNSPEC 000400 -#define REG_UUNPORT 001000 -#define REG_ULOCALE 002000 -#define REG_UEMPTYMATCH 004000 -#define REG_UIMPOSSIBLE 010000 -#define REG_USHORTEST 020000 - int re_csize; /* sizeof(character) */ - char *re_endp; /* backward compatibility kludge */ - /* the rest is opaque pointers to hidden innards */ - char *re_guts; /* `char *' is more portable than `void *' */ - char *re_fns; +/* === regex2.h === */ +typedef off_t regoff_t; +typedef struct { + int re_magic; + size_t re_nsub; /* number of parenthesized subexpressions */ + const char *re_endp; /* end pointer for REG_PEND */ + struct re_guts *re_g; /* none of your business :-) */ } regex_t; - -/* result reporting (may acquire more fields later) */ -typedef struct -{ - regoff_t rm_so; /* start of substring */ - regoff_t rm_eo; /* end of substring */ +typedef struct { + regoff_t rm_so; /* start of match */ + regoff_t rm_eo; /* end of match */ } regmatch_t; -/* supplementary control and reporting */ -typedef struct -{ - regmatch_t rm_extend; /* see REG_EXPECT */ -} rm_detail_t; + +/* === regcomp.c === */ +extern int regcomp(regex_t *, const char *, int); +#define REG_BASIC 0000 +#define REG_EXTENDED 0001 +#define REG_ICASE 0002 +#define REG_NOSUB 0004 +#define REG_NEWLINE 0010 +#define REG_NOSPEC 0020 +#define REG_PEND 0040 +#define REG_DUMP 0200 - -/* - * regex compilation flags - */ -#define REG_BASIC 000000 /* BREs (convenience) */ -#define REG_EXTENDED 000001 /* EREs */ -#define REG_ADVF 000002 /* advanced features in EREs */ -#define REG_ADVANCED 000003 /* AREs (which are also EREs) */ -#define REG_QUOTE 000004 /* no special characters, none */ -#define REG_NOSPEC REG_QUOTE /* historical synonym */ -#define REG_ICASE 000010 /* ignore case */ -#define REG_NOSUB 000020 /* don't care about subexpressions */ -#define REG_EXPANDED 000040 /* expanded format, white space & comments */ -#define REG_NLSTOP 000100 /* \n doesn't match . or [^ ] */ -#define REG_NLANCH 000200 /* ^ matches after \n, $ before */ -#define REG_NEWLINE 000300 /* newlines are line terminators */ -#define REG_PEND 000400 /* ugh -- backward-compatibility hack */ -#define REG_EXPECT 001000 /* report details on partial/limited - * matches */ -#define REG_BOSONLY 002000 /* temporary kludge for BOS-only matches */ -#define REG_DUMP 004000 /* none of your business :-) */ -#define REG_FAKE 010000 /* none of your business :-) */ -#define REG_PROGRESS 020000 /* none of your business :-) */ - - - -/* - * regex execution flags - */ -#define REG_NOTBOL 0001 /* BOS is not BOL */ -#define REG_NOTEOL 0002 /* EOS is not EOL */ -#define REG_STARTEND 0004 /* backward compatibility kludge */ -#define REG_FTRACE 0010 /* none of your business */ -#define REG_MTRACE 0020 /* none of your business */ -#define REG_SMALL 0040 /* none of your business */ - - -/* - * error reporting - * Be careful if modifying the list of error codes -- the table used by - * regerror() is generated automatically from this file! - */ -#define REG_OKAY 0 /* no errors detected */ -#define REG_NOMATCH 1 /* failed to match */ -#define REG_BADPAT 2 /* invalid regexp */ -#define REG_ECOLLATE 3 /* invalid collating element */ -#define REG_ECTYPE 4 /* invalid character class */ -#define REG_EESCAPE 5 /* invalid escape \ sequence */ -#define REG_ESUBREG 6 /* invalid backreference number */ -#define REG_EBRACK 7 /* brackets [] not balanced */ -#define REG_EPAREN 8 /* parentheses () not balanced */ -#define REG_EBRACE 9 /* braces {} not balanced */ -#define REG_BADBR 10 /* invalid repetition count(s) */ -#define REG_ERANGE 11 /* invalid character range */ -#define REG_ESPACE 12 /* out of memory */ -#define REG_BADRPT 13 /* quantifier operand invalid */ -#define REG_ASSERT 15 /* "can't happen" -- you found a bug */ -#define REG_INVARG 16 /* invalid argument to regex function */ -#define REG_MIXED 17 /* character widths of regex and string - * differ */ -#define REG_BADOPT 18 /* invalid embedded option */ -/* two specials for debugging and testing */ -#define REG_ATOI 101 /* convert error-code name to number */ -#define REG_ITOA 102 /* convert error-code number to name */ - - - -/* - * the prototypes for exported functions - */ -extern int wx_regcomp(regex_t *, const wx_wchar *, size_t, int); -extern int regcomp(regex_t *, const wx_wchar *, int); -extern int wx_regexec(regex_t *, const wx_wchar *, size_t, rm_detail_t *, size_t, regmatch_t[], int); -extern int regexec(regex_t *, const wx_wchar *, size_t, regmatch_t[], int); -extern void regfree(regex_t *); +/* === regerror.c === */ +#define REG_OKAY 0 +#define REG_NOMATCH 1 +#define REG_BADPAT 2 +#define REG_ECOLLATE 3 +#define REG_ECTYPE 4 +#define REG_EESCAPE 5 +#define REG_ESUBREG 6 +#define REG_EBRACK 7 +#define REG_EPAREN 8 +#define REG_EBRACE 9 +#define REG_BADBR 10 +#define REG_ERANGE 11 +#define REG_ESPACE 12 +#define REG_BADRPT 13 +#define REG_EMPTY 14 +#define REG_ASSERT 15 +#define REG_INVARG 16 +#define REG_ATOI 255 /* convert name to number (!) */ +#define REG_ITOA 0400 /* convert number to name (!) */ extern size_t regerror(int, const regex_t *, char *, size_t); -extern void wx_regfree(regex_t *); -extern size_t wx_regerror(int, const regex_t *, char *, size_t); + + +/* === regexec.c === */ +extern int regexec(const regex_t *, const char *, size_t, regmatch_t [], int); +#define REG_NOTBOL 00001 +#define REG_NOTEOL 00002 +#define REG_STARTEND 00004 +#define REG_TRACE 00400 /* tracing of execution */ +#define REG_LARGE 01000 /* force large representation */ +#define REG_BACKR 02000 /* force use of backref code */ + + +/* === regfree.c === */ +extern void regfree(regex_t *); #ifdef __cplusplus } #endif - -#endif /* _REGEX_H_ */ +/* ========= end header generated by ./mkh ========= */ +#endif diff --git a/src/regex/regex2.h b/src/regex/regex2.h new file mode 100644 index 0000000000..58fd8d8a43 --- /dev/null +++ b/src/regex/regex2.h @@ -0,0 +1,134 @@ +/* + * First, the stuff that ends up in the outside-world include file + = typedef off_t regoff_t; + = typedef struct { + = int re_magic; + = size_t re_nsub; // number of parenthesized subexpressions + = const char *re_endp; // end pointer for REG_PEND + = struct re_guts *re_g; // none of your business :-) + = } regex_t; + = typedef struct { + = regoff_t rm_so; // start of match + = regoff_t rm_eo; // end of match + = } regmatch_t; + */ +/* + * internals of regex_t + */ +#define MAGIC1 ((('r'^0200)<<8) | 'e') + +/* + * The internal representation is a *strip*, a sequence of + * operators ending with an endmarker. (Some terminology etc. is a + * historical relic of earlier versions which used multiple strips.) + * Certain oddities in the representation are there to permit running + * the machinery backwards; in particular, any deviation from sequential + * flow must be marked at both its source and its destination. Some + * fine points: + * + * - OPLUS_ and O_PLUS are *inside* the loop they create. + * - OQUEST_ and O_QUEST are *outside* the bypass they create. + * - OCH_ and O_CH are *outside* the multi-way branch they create, while + * OOR1 and OOR2 are respectively the end and the beginning of one of + * the branches. Note that there is an implicit OOR2 following OCH_ + * and an implicit OOR1 preceding O_CH. + * + * In state representations, an operator's bit is on to signify a state + * immediately *preceding* "execution" of that operator. + */ +typedef long sop; /* strip operator */ +typedef long sopno; +#define OPRMASK 0x7c000000 +#define OPDMASK 0x03ffffff +#define OPSHIFT (26) +#define OP(n) ((n)&OPRMASK) +#define OPND(n) ((n)&OPDMASK) +#define SOP(op, opnd) ((op)|(opnd)) +/* operators meaning operand */ +/* (back, fwd are offsets) */ +#define OEND (1< uch [csetsize] */ + uch mask; /* bit within array */ + uch hash; /* hash code */ + size_t smultis; + char *multis; /* -> char[smulti] ab\0cd\0ef\0\0 */ +} cset; +/* note that CHadd and CHsub are unsafe, and CHIN doesn't yield 0/1 */ +#define CHadd(cs, c) ((cs)->ptr[(uch)(c)] |= (cs)->mask, (cs)->hash += (c)) +#define CHsub(cs, c) ((cs)->ptr[(uch)(c)] &= ~(cs)->mask, (cs)->hash -= (c)) +#define CHIN(cs, c) ((cs)->ptr[(uch)(c)] & (cs)->mask) +#define MCadd(p, cs, cp) mcadd(p, cs, cp) /* regcomp() internal fns */ +#define MCsub(p, cs, cp) mcsub(p, cs, cp) +#define MCin(p, cs, cp) mcin(p, cs, cp) + +/* stuff for character categories */ +typedef unsigned char cat_t; + +/* + * main compiled-expression structure + */ +struct re_guts { + int magic; +# define MAGIC2 ((('R'^0200)<<8)|'E') + sop *strip; /* malloced area for strip */ + int csetsize; /* number of bits in a cset vector */ + int ncsets; /* number of csets in use */ + cset *sets; /* -> cset [ncsets] */ + uch *setbits; /* -> uch[csetsize][ncsets/CHAR_BIT] */ + int cflags; /* copy of regcomp() cflags argument */ + sopno nstates; /* = number of sops */ + sopno firststate; /* the initial OEND (normally 0) */ + sopno laststate; /* the final OEND */ + int iflags; /* internal flags */ +# define USEBOL 01 /* used ^ */ +# define USEEOL 02 /* used $ */ +# define BAD 04 /* something wrong */ + int nbol; /* number of ^ used */ + int neol; /* number of $ used */ + int ncategories; /* how many character categories */ + cat_t *categories; /* ->catspace[-CHAR_MIN] */ + char *must; /* match must contain this string */ + int mlen; /* length of must */ + size_t nsub; /* copy of re_nsub */ + int backrefs; /* does it use back references? */ + sopno nplus; /* how deep does it nest +s? */ + /* catspace must be last */ + cat_t catspace[1]; /* actually [NC] */ +}; + +/* misc utilities */ +#define OUT (CHAR_MAX+1) /* a non-character value */ +#define ISWORD(c) (isalnum(c) || (c) == '_') diff --git a/src/regex/regexec.c b/src/regex/regexec.c index c3a29093f5..d64bc28fcb 100644 --- a/src/regex/regexec.c +++ b/src/regex/regexec.c @@ -1,1089 +1,144 @@ /* - * re_*exec and friends - match REs - * - * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. - * - * Development of this software was funded, in part, by Cray Research Inc., - * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics - * Corporation, none of whom are responsible for the results. The author - * thanks all of them. - * - * Redistribution and use in source and binary forms -- with or without - * modification -- are permitted for any purpose, provided that - * redistributions in source form retain this entire copyright notice and - * indicate the origin and nature of any modifications. - * - * I'd appreciate being given credit for this package in the documentation - * of software which uses it, but that is not a requirement. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, - * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY - * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL - * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; - * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR - * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF - * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * $Header: /projects/cvsroot/pgsql-server/src/backend/regex/regexec.c,v 1.23 2003/08/08 21:41:56 momjian Exp $ + * the outer shell of regexec() * + * This file includes engine.c *twice*, after muchos fiddling with the + * macros that code uses. This lets the same code operate on two different + * representations for state sets. */ +#if defined(__MWERKS__) && !defined(__MACH__) +typedef long off_t ; +#else +#include +#endif +#include +#include +#include +#include +#include +#include "regex.h" -#include "regguts.h" +#include "utils.h" +#include "regex2.h" +#ifdef REDEBUG +static int nope = 0; /* for use in asserts; shuts lint up */ +#endif +/* macros for manipulating states, small version */ +#define states unsigned +#define states1 unsigned /* for later use in regexec() decision */ +#define CLEAR(v) ((v) = 0) +#define SET0(v, n) ((v) &= ~((unsigned)1 << (n))) +#define SET1(v, n) ((v) |= (unsigned)1 << (n)) +#define ISSET(v, n) ((v) & ((unsigned)1 << (n))) +#define ASSIGN(d, s) ((d) = (s)) +#define EQ(a, b) ((a) == (b)) +#define STATEVARS int dummy /* dummy version */ +#define STATESETUP(m, n) /* nothing */ +#define STATETEARDOWN(m) /* nothing */ +#define SETUP(v) ((v) = 0) +#define onestate unsigned +#define INIT(o, n) ((o) = (unsigned)1 << (n)) +#define INC(o) ((o) <<= 1) +#define ISSTATEIN(v, o) ((v) & (o)) +/* some abbreviations; note that some of these know variable names! */ +/* do "if I'm here, I can also be there" etc without branches */ +#define FWD(dst, src, n) ((dst) |= ((unsigned)(src)&(here)) << (n)) +#define BACK(dst, src, n) ((dst) |= ((unsigned)(src)&(here)) >> (n)) +#define ISSETBACK(v, n) ((v) & ((unsigned)here >> (n))) +/* function names */ +#define SNAMES /* engine.c looks after details */ -/* lazy-DFA representation */ -struct arcp -{ /* "pointer" to an outarc */ - struct sset *ss; - color co; -}; +#include "engine.c" -struct sset -{ /* state set */ - unsigned *states; /* pointer to bitvector */ - unsigned hash; /* hash of bitvector */ -#define HASH(bv, nw) (((nw) == 1) ? *(bv) : hash(bv, nw)) -#define HIT(h,bv,ss,nw) ((ss)->hash == (h) && ((nw) == 1 || \ - memcmp(VS(bv), VS((ss)->states), (nw)*sizeof(unsigned)) == 0)) - int flags; -#define STARTER 01 /* the initial state set */ -#define POSTSTATE 02 /* includes the goal state */ -#define LOCKED 04 /* locked in cache */ -#define NOPROGRESS 010 /* zero-progress state set */ - struct arcp ins; /* chain of inarcs pointing here */ - chr *lastseen; /* last entered on arrival here */ - struct sset **outs; /* outarc vector indexed by color */ - struct arcp *inchain; /* chain-pointer vector for outarcs */ -}; - -struct dfa -{ - int nssets; /* size of cache */ - int nssused; /* how many entries occupied yet */ - int nstates; /* number of states */ - int ncolors; /* length of outarc and inchain vectors */ - int wordsper; /* length of state-set bitvectors */ - struct sset *ssets; /* state-set cache */ - unsigned *statesarea; /* bitvector storage */ - unsigned *work; /* pointer to work area within statesarea */ - struct sset **outsarea; /* outarc-vector storage */ - struct arcp *incarea; /* inchain storage */ - struct cnfa *cnfa; - struct colormap *cm; - chr *lastpost; /* location of last cache-flushed success */ - chr *lastnopr; /* location of last cache-flushed - * NOPROGRESS */ - struct sset *search; /* replacement-search-pointer memory */ - int cptsmalloced; /* were the areas individually malloced? */ - char *mallocarea; /* self, or master malloced area, or NULL */ -}; - -#define WORK 1 /* number of work bitvectors needed */ - -/* setup for non-malloc allocation for small cases */ -#define FEWSTATES 20 /* must be less than UBITS */ -#define FEWCOLORS 15 -struct smalldfa -{ - struct dfa dfa; - struct sset ssets[FEWSTATES * 2]; - unsigned statesarea[FEWSTATES * 2 + WORK]; - struct sset *outsarea[FEWSTATES * 2 * FEWCOLORS]; - struct arcp incarea[FEWSTATES * 2 * FEWCOLORS]; -}; - -#define DOMALLOC ((struct smalldfa *)NULL) /* force malloc */ - - - -/* internal variables, bundled for easy passing around */ -struct vars -{ - regex_t *re; - struct guts *g; - int eflags; /* copies of arguments */ - size_t nmatch; - regmatch_t *pmatch; - rm_detail_t *details; - chr *start; /* start of string */ - chr *stop; /* just past end of string */ - int err; /* error code if any (0 none) */ - regoff_t *mem; /* memory vector for backtracking */ - struct smalldfa dfa1; - struct smalldfa dfa2; -}; - -#define VISERR(vv) ((vv)->err != 0) /* have we seen an error yet? */ -#define ISERR() VISERR(v) -#define VERR(vv,e) (((vv)->err) ? (vv)->err : ((vv)->err = (e))) -#define ERR(e) VERR(v, e) /* record an error */ -#define NOERR() {if (ISERR()) return v->err;} /* if error seen, return - * it */ -#define OFF(p) ((p) - v->start) -#define LOFF(p) ((long)OFF(p)) +/* now undo things */ +#undef states +#undef CLEAR +#undef SET0 +#undef SET1 +#undef ISSET +#undef ASSIGN +#undef EQ +#undef STATEVARS +#undef STATESETUP +#undef STATETEARDOWN +#undef SETUP +#undef onestate +#undef INIT +#undef INC +#undef ISSTATEIN +#undef FWD +#undef BACK +#undef ISSETBACK +#undef SNAMES +/* macros for manipulating states, large version */ +#define states char * +#define CLEAR(v) memset(v, 0, m->g->nstates) +#define SET0(v, n) ((v)[n] = 0) +#define SET1(v, n) ((v)[n] = 1) +#define ISSET(v, n) ((v)[n]) +#define ASSIGN(d, s) memcpy(d, s, m->g->nstates) +#define EQ(a, b) (memcmp(a, b, m->g->nstates) == 0) +#define STATEVARS int vn; char *space +#define STATESETUP(m, nv) { (m)->space = malloc((nv)*(m)->g->nstates); \ + if ((m)->space == NULL) return(REG_ESPACE); \ + (m)->vn = 0; } +#define STATETEARDOWN(m) { free((m)->space); } +#define SETUP(v) ((v) = &m->space[m->vn++ * m->g->nstates]) +#define onestate int +#define INIT(o, n) ((o) = (n)) +#define INC(o) ((o)++) +#define ISSTATEIN(v, o) ((v)[o]) +/* some abbreviations; note that some of these know variable names! */ +/* do "if I'm here, I can also be there" etc without branches */ +#define FWD(dst, src, n) ((dst)[here+(n)] |= (src)[here]) +#define BACK(dst, src, n) ((dst)[here-(n)] |= (src)[here]) +#define ISSETBACK(v, n) ((v)[here - (n)]) +/* function names */ +#define LNAMES /* flag */ +#include "engine.c" /* - * forward declarations + - regexec - interface for matching + = extern int regexec(const regex_t *, const char *, size_t, \ + = regmatch_t [], int); + = #define REG_NOTBOL 00001 + = #define REG_NOTEOL 00002 + = #define REG_STARTEND 00004 + = #define REG_TRACE 00400 // tracing of execution + = #define REG_LARGE 01000 // force large representation + = #define REG_BACKR 02000 // force use of backref code + * + * We put this here so we can exploit knowledge of the state representation + * when choosing which matcher to call. Also, by this point the matchers + * have been prototyped. */ -/* === regexec.c === */ -static int find(struct vars *, struct cnfa *, struct colormap *); -static int cfind(struct vars *, struct cnfa *, struct colormap *); -static int cfindloop(struct vars *, struct cnfa *, struct colormap *, struct dfa *, struct dfa *, chr **); -static void zapsubs(regmatch_t *, size_t); -static void zapmem(struct vars *, struct subre *); -static void subset(struct vars *, struct subre *, chr *, chr *); -static int dissect(struct vars *, struct subre *, chr *, chr *); -static int condissect(struct vars *, struct subre *, chr *, chr *); -static int altdissect(struct vars *, struct subre *, chr *, chr *); -static int cdissect(struct vars *, struct subre *, chr *, chr *); -static int ccondissect(struct vars *, struct subre *, chr *, chr *); -static int crevdissect(struct vars *, struct subre *, chr *, chr *); -static int cbrdissect(struct vars *, struct subre *, chr *, chr *); -static int caltdissect(struct vars *, struct subre *, chr *, chr *); - -/* === rege_dfa.c === */ -static chr *longest(struct vars *, struct dfa *, chr *, chr *, int *); -static chr *shortest(struct vars *, struct dfa *, chr *, chr *, chr *, chr **, int *); -static chr *lastcold(struct vars *, struct dfa *); -static struct dfa *newdfa(struct vars *, struct cnfa *, struct colormap *, struct smalldfa *); -static void freedfa(struct dfa *); -static unsigned hash(unsigned *, int); -static struct sset *initialize(struct vars *, struct dfa *, chr *); -static struct sset *miss(struct vars *, struct dfa *, struct sset *, pcolor, chr *, chr *); -static int lacon(struct vars *, struct cnfa *, chr *, pcolor); -static struct sset *getvacant(struct vars *, struct dfa *, chr *, chr *); -static struct sset *pickss(struct vars *, struct dfa *, chr *, chr *); - - -/* - * regexec - match regular expression - */ -int -regexec(regex_t *re, - const chr *string, - size_t nmatch, - regmatch_t pmatch[], - int flags) +int /* 0 success, REG_NOMATCH failure */ +regexec(preg, string, nmatch, pmatch, eflags) +const regex_t *preg; +const char *string; +size_t nmatch; +regmatch_t pmatch[]; +int eflags; { - rm_detail_t det; - size_t nLen = 0; - chr* s2 = (chr*) string; + register struct re_guts *g = preg->re_g; +#ifdef REDEBUG +# define GOODFLAGS(f) (f) +#else +# define GOODFLAGS(f) ((f)&(REG_NOTBOL|REG_NOTEOL|REG_STARTEND)) +#endif - if (string && *string) - { - while(*++s2); - } + if (preg->re_magic != MAGIC1 || g->magic != MAGIC2) + return(REG_BADPAT); + assert(!(g->iflags&BAD)); + if (g->iflags&BAD) /* backstop for no-debug case */ + return(REG_BADPAT); + eflags = GOODFLAGS(eflags); - nLen = ((s2 - string) / sizeof(chr)); - - return wx_regexec(re, string, nLen, &det, nmatch, pmatch, flags); -} -int -wx_regexec(regex_t *re, - const chr *string, - size_t len, - rm_detail_t *details, - size_t nmatch, - regmatch_t pmatch[], - int flags) -{ - struct vars var; - register struct vars *v = &var; - int st; - size_t n; - int backref; - -#define LOCALMAT 20 - regmatch_t mat[LOCALMAT]; - -#define LOCALMEM 40 - regoff_t mem[LOCALMEM]; - - /* sanity checks */ - if (re == NULL || string == NULL || re->re_magic != REMAGIC) - return REG_INVARG; - if (re->re_csize != sizeof(chr)) - return REG_MIXED; - - /* setup */ - v->re = re; - v->g = (struct guts *) re->re_guts; - if ((v->g->cflags & REG_EXPECT) && details == NULL) - return REG_INVARG; - if (v->g->info & REG_UIMPOSSIBLE) - return REG_NOMATCH; - backref = (v->g->info & REG_UBACKREF) ? 1 : 0; - v->eflags = flags; - if (v->g->cflags & REG_NOSUB) - nmatch = 0; /* override client */ - v->nmatch = nmatch; - if (backref) - { - /* need work area */ - if (v->g->nsub + 1 <= LOCALMAT) - v->pmatch = mat; - else - v->pmatch = (regmatch_t *) MALLOC((v->g->nsub + 1) * - sizeof(regmatch_t)); - if (v->pmatch == NULL) - return REG_ESPACE; - v->nmatch = v->g->nsub + 1; - } + if (g->nstates <= CHAR_BIT*sizeof(states1) && !(eflags®_LARGE)) + return(smatcher(g, (char *)string, nmatch, pmatch, eflags)); else - v->pmatch = pmatch; - v->details = details; - v->start = (chr *) string; - v->stop = (chr *) string + len; - v->err = 0; - if (backref) - { - /* need retry memory */ - assert(v->g->ntree >= 0); - n = (size_t) v->g->ntree; - if (n <= LOCALMEM) - v->mem = mem; - else - v->mem = (regoff_t *) MALLOC(n * sizeof(regoff_t)); - if (v->mem == NULL) - { - if (v->pmatch != pmatch && v->pmatch != mat) - FREE(v->pmatch); - return REG_ESPACE; - } - } - else - v->mem = NULL; - - /* do it */ - assert(v->g->tree != NULL); - if (backref) - st = cfind(v, &v->g->tree->cnfa, &v->g->cmap); - else - st = find(v, &v->g->tree->cnfa, &v->g->cmap); - - /* copy (portion of) match vector over if necessary */ - if (st == REG_OKAY && v->pmatch != pmatch && nmatch > 0) - { - zapsubs(pmatch, nmatch); - n = (nmatch < v->nmatch) ? nmatch : v->nmatch; - memcpy(VS(pmatch), VS(v->pmatch), n * sizeof(regmatch_t)); - } - - /* clean up */ - if (v->pmatch != pmatch && v->pmatch != mat) - FREE(v->pmatch); - if (v->mem != NULL && v->mem != mem) - FREE(v->mem); - return st; + return(lmatcher(g, (char *)string, nmatch, pmatch, eflags)); } - -/* - * find - find a match for the main NFA (no-complications case) - */ -static int -find(struct vars * v, - struct cnfa * cnfa, - struct colormap * cm) -{ - struct dfa *s; - struct dfa *d; - chr *begin; - chr *end = NULL; - chr *cold; - chr *open; /* open and close of range of possible - * starts */ - chr *close; - int hitend; - int shorter = (v->g->tree->flags & SHORTER) ? 1 : 0; - - /* first, a shot with the search RE */ - s = newdfa(v, &v->g->search, cm, &v->dfa1); - assert(!(ISERR() && s != NULL)); - NOERR(); - MDEBUG(("\nsearch at %ld\n", LOFF(v->start))); - cold = NULL; - close = shortest(v, s, v->start, v->start, v->stop, &cold, (int *) NULL); - freedfa(s); - NOERR(); - if (v->g->cflags & REG_EXPECT) - { - assert(v->details != NULL); - if (cold != NULL) - v->details->rm_extend.rm_so = OFF(cold); - else - v->details->rm_extend.rm_so = OFF(v->stop); - v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */ - } - if (close == NULL) /* not found */ - return REG_NOMATCH; - if (v->nmatch == 0) /* found, don't need exact location */ - return REG_OKAY; - - /* find starting point and match */ - assert(cold != NULL); - open = cold; - cold = NULL; - MDEBUG(("between %ld and %ld\n", LOFF(open), LOFF(close))); - d = newdfa(v, cnfa, cm, &v->dfa1); - assert(!(ISERR() && d != NULL)); - NOERR(); - for (begin = open; begin <= close; begin++) - { - MDEBUG(("\nfind trying at %ld\n", LOFF(begin))); - if (shorter) - end = shortest(v, d, begin, begin, v->stop, - (chr **) NULL, &hitend); - else - end = longest(v, d, begin, v->stop, &hitend); - NOERR(); - if (hitend && cold == NULL) - cold = begin; - if (end != NULL) - break; /* NOTE BREAK OUT */ - } - assert(end != NULL); /* search RE succeeded so loop should */ - freedfa(d); - - /* and pin down details */ - assert(v->nmatch > 0); - v->pmatch[0].rm_so = OFF(begin); - v->pmatch[0].rm_eo = OFF(end); - if (v->g->cflags & REG_EXPECT) - { - if (cold != NULL) - v->details->rm_extend.rm_so = OFF(cold); - else - v->details->rm_extend.rm_so = OFF(v->stop); - v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */ - } - if (v->nmatch == 1) /* no need for submatches */ - return REG_OKAY; - - /* submatches */ - zapsubs(v->pmatch, v->nmatch); - return dissect(v, v->g->tree, begin, end); -} - -/* - * cfind - find a match for the main NFA (with complications) - */ -static int -cfind(struct vars * v, - struct cnfa * cnfa, - struct colormap * cm) -{ - struct dfa *s; - struct dfa *d; - chr *cold; - int ret; - - s = newdfa(v, &v->g->search, cm, &v->dfa1); - NOERR(); - d = newdfa(v, cnfa, cm, &v->dfa2); - if (ISERR()) - { - assert(d == NULL); - freedfa(s); - return v->err; - } - - ret = cfindloop(v, cnfa, cm, d, s, &cold); - - freedfa(d); - freedfa(s); - NOERR(); - if (v->g->cflags & REG_EXPECT) - { - assert(v->details != NULL); - if (cold != NULL) - v->details->rm_extend.rm_so = OFF(cold); - else - v->details->rm_extend.rm_so = OFF(v->stop); - v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */ - } - return ret; -} - -/* - * cfindloop - the heart of cfind - */ -static int -cfindloop(struct vars * v, - struct cnfa * cnfa, - struct colormap * cm, - struct dfa * d, - struct dfa * s, - chr **coldp) /* where to put coldstart pointer */ -{ - chr *begin; - chr *end; - chr *cold; - chr *open; /* open and close of range of possible - * starts */ - chr *close; - chr *estart; - chr *estop; - int er; - int shorter = v->g->tree->flags & SHORTER; - int hitend; - - assert(d != NULL && s != NULL); - cold = NULL; - close = v->start; - do - { - MDEBUG(("\ncsearch at %ld\n", LOFF(close))); - close = shortest(v, s, close, close, v->stop, &cold, (int *) NULL); - if (close == NULL) - break; /* NOTE BREAK */ - assert(cold != NULL); - open = cold; - cold = NULL; - MDEBUG(("cbetween %ld and %ld\n", LOFF(open), LOFF(close))); - for (begin = open; begin <= close; begin++) - { - MDEBUG(("\ncfind trying at %ld\n", LOFF(begin))); - estart = begin; - estop = v->stop; - for (;;) - { - if (shorter) - end = shortest(v, d, begin, estart, - estop, (chr **) NULL, &hitend); - else - end = longest(v, d, begin, estop, - &hitend); - if (hitend && cold == NULL) - cold = begin; - if (end == NULL) - break; /* NOTE BREAK OUT */ - MDEBUG(("tentative end %ld\n", LOFF(end))); - zapsubs(v->pmatch, v->nmatch); - zapmem(v, v->g->tree); - er = cdissect(v, v->g->tree, begin, end); - if (er == REG_OKAY) - { - if (v->nmatch > 0) - { - v->pmatch[0].rm_so = OFF(begin); - v->pmatch[0].rm_eo = OFF(end); - } - *coldp = cold; - return REG_OKAY; - } - if (er != REG_NOMATCH) - { - ERR(er); - return er; - } - if ((shorter) ? end == estop : end == begin) - { - /* no point in trying again */ - *coldp = cold; - return REG_NOMATCH; - } - /* go around and try again */ - if (shorter) - estart = end + 1; - else - estop = end - 1; - } - } - } while (close < v->stop); - - *coldp = cold; - return REG_NOMATCH; -} - -/* - * zapsubs - initialize the subexpression matches to "no match" - */ -static void -zapsubs(regmatch_t *p, - size_t n) -{ - size_t i; - - for (i = n - 1; i > 0; i--) - { - p[i].rm_so = -1; - p[i].rm_eo = -1; - } -} - -/* - * zapmem - initialize the retry memory of a subtree to zeros - */ -static void -zapmem(struct vars * v, - struct subre * t) -{ - if (t == NULL) - return; - - assert(v->mem != NULL); - v->mem[t->retry] = 0; - if (t->op == '(') - { - assert(t->subno > 0); - v->pmatch[t->subno].rm_so = -1; - v->pmatch[t->subno].rm_eo = -1; - } - - if (t->left != NULL) - zapmem(v, t->left); - if (t->right != NULL) - zapmem(v, t->right); -} - -/* - * subset - set any subexpression relevant to a successful subre - */ -static void -subset(struct vars * v, - struct subre * sub, - chr *begin, - chr *end) -{ - int n = sub->subno; - - assert(n > 0); - if ((size_t) n >= v->nmatch) - return; - - MDEBUG(("setting %d\n", n)); - v->pmatch[n].rm_so = OFF(begin); - v->pmatch[n].rm_eo = OFF(end); -} - -/* - * dissect - determine subexpression matches (uncomplicated case) - */ -static int /* regexec return code */ -dissect(struct vars * v, - struct subre * t, - chr *begin, /* beginning of relevant substring */ - chr *end) /* end of same */ -{ - assert(t != NULL); - MDEBUG(("dissect %ld-%ld\n", LOFF(begin), LOFF(end))); - - switch (t->op) - { - case '=': /* terminal node */ - assert(t->left == NULL && t->right == NULL); - return REG_OKAY; /* no action, parent did the work */ - break; - case '|': /* alternation */ - assert(t->left != NULL); - return altdissect(v, t, begin, end); - break; - case 'b': /* back ref -- shouldn't be calling us! */ - return REG_ASSERT; - break; - case '.': /* concatenation */ - assert(t->left != NULL && t->right != NULL); - return condissect(v, t, begin, end); - break; - case '(': /* capturing */ - assert(t->left != NULL && t->right == NULL); - assert(t->subno > 0); - subset(v, t, begin, end); - return dissect(v, t->left, begin, end); - break; - default: - return REG_ASSERT; - break; - } -} - -/* - * condissect - determine concatenation subexpression matches (uncomplicated) - */ -static int /* regexec return code */ -condissect(struct vars * v, - struct subre * t, - chr *begin, /* beginning of relevant substring */ - chr *end) /* end of same */ -{ - struct dfa *d; - struct dfa *d2; - chr *mid; - int i; - int shorter = (t->left->flags & SHORTER) ? 1 : 0; - chr *stop = (shorter) ? end : begin; - - assert(t->op == '.'); - assert(t->left != NULL && t->left->cnfa.nstates > 0); - assert(t->right != NULL && t->right->cnfa.nstates > 0); - - d = newdfa(v, &t->left->cnfa, &v->g->cmap, &v->dfa1); - NOERR(); - d2 = newdfa(v, &t->right->cnfa, &v->g->cmap, &v->dfa2); - if (ISERR()) - { - assert(d2 == NULL); - freedfa(d); - return v->err; - } - - /* pick a tentative midpoint */ - if (shorter) - mid = shortest(v, d, begin, begin, end, (chr **) NULL, - (int *) NULL); - else - mid = longest(v, d, begin, end, (int *) NULL); - if (mid == NULL) - { - freedfa(d); - freedfa(d2); - return REG_ASSERT; - } - MDEBUG(("tentative midpoint %ld\n", LOFF(mid))); - - /* iterate until satisfaction or failure */ - while (longest(v, d2, mid, end, (int *) NULL) != end) - { - /* that midpoint didn't work, find a new one */ - if (mid == stop) - { - /* all possibilities exhausted! */ - MDEBUG(("no midpoint!\n")); - freedfa(d); - freedfa(d2); - return REG_ASSERT; - } - if (shorter) - mid = shortest(v, d, begin, mid + 1, end, (chr **) NULL, - (int *) NULL); - else - mid = longest(v, d, begin, mid - 1, (int *) NULL); - if (mid == NULL) - { - /* failed to find a new one! */ - MDEBUG(("failed midpoint!\n")); - freedfa(d); - freedfa(d2); - return REG_ASSERT; - } - MDEBUG(("new midpoint %ld\n", LOFF(mid))); - } - - /* satisfaction */ - MDEBUG(("successful\n")); - freedfa(d); - freedfa(d2); - i = dissect(v, t->left, begin, mid); - if (i != REG_OKAY) - return i; - return dissect(v, t->right, mid, end); -} - -/* - * altdissect - determine alternative subexpression matches (uncomplicated) - */ -static int /* regexec return code */ -altdissect(struct vars * v, - struct subre * t, - chr *begin, /* beginning of relevant substring */ - chr *end) /* end of same */ -{ - struct dfa *d; - int i; - - assert(t != NULL); - assert(t->op == '|'); - - for (i = 0; t != NULL; t = t->right, i++) - { - MDEBUG(("trying %dth\n", i)); - assert(t->left != NULL && t->left->cnfa.nstates > 0); - d = newdfa(v, &t->left->cnfa, &v->g->cmap, &v->dfa1); - if (ISERR()) - return v->err; - if (longest(v, d, begin, end, (int *) NULL) == end) - { - MDEBUG(("success\n")); - freedfa(d); - return dissect(v, t->left, begin, end); - } - freedfa(d); - } - return REG_ASSERT; /* none of them matched?!? */ -} - -/* - * cdissect - determine subexpression matches (with complications) - * The retry memory stores the offset of the trial midpoint from begin, - * plus 1 so that 0 uniquely means "clean slate". - */ -static int /* regexec return code */ -cdissect(struct vars * v, - struct subre * t, - chr *begin, /* beginning of relevant substring */ - chr *end) /* end of same */ -{ - int er; - - assert(t != NULL); - MDEBUG(("cdissect %ld-%ld %c\n", LOFF(begin), LOFF(end), t->op)); - - switch (t->op) - { - case '=': /* terminal node */ - assert(t->left == NULL && t->right == NULL); - return REG_OKAY; /* no action, parent did the work */ - break; - case '|': /* alternation */ - assert(t->left != NULL); - return caltdissect(v, t, begin, end); - break; - case 'b': /* back ref -- shouldn't be calling us! */ - assert(t->left == NULL && t->right == NULL); - return cbrdissect(v, t, begin, end); - break; - case '.': /* concatenation */ - assert(t->left != NULL && t->right != NULL); - return ccondissect(v, t, begin, end); - break; - case '(': /* capturing */ - assert(t->left != NULL && t->right == NULL); - assert(t->subno > 0); - er = cdissect(v, t->left, begin, end); - if (er == REG_OKAY) - subset(v, t, begin, end); - return er; - break; - default: - return REG_ASSERT; - break; - } -} - -/* - * ccondissect - concatenation subexpression matches (with complications) - * The retry memory stores the offset of the trial midpoint from begin, - * plus 1 so that 0 uniquely means "clean slate". - */ -static int /* regexec return code */ -ccondissect(struct vars * v, - struct subre * t, - chr *begin, /* beginning of relevant substring */ - chr *end) /* end of same */ -{ - struct dfa *d; - struct dfa *d2; - chr *mid; - int er; - - assert(t->op == '.'); - assert(t->left != NULL && t->left->cnfa.nstates > 0); - assert(t->right != NULL && t->right->cnfa.nstates > 0); - - if (t->left->flags & SHORTER) /* reverse scan */ - return crevdissect(v, t, begin, end); - - d = newdfa(v, &t->left->cnfa, &v->g->cmap, DOMALLOC); - if (ISERR()) - return v->err; - d2 = newdfa(v, &t->right->cnfa, &v->g->cmap, DOMALLOC); - if (ISERR()) - { - freedfa(d); - return v->err; - } - MDEBUG(("cconcat %d\n", t->retry)); - - /* pick a tentative midpoint */ - if (v->mem[t->retry] == 0) - { - mid = longest(v, d, begin, end, (int *) NULL); - if (mid == NULL) - { - freedfa(d); - freedfa(d2); - return REG_NOMATCH; - } - MDEBUG(("tentative midpoint %ld\n", LOFF(mid))); - v->mem[t->retry] = (mid - begin) + 1; - } - else - { - mid = begin + (v->mem[t->retry] - 1); - MDEBUG(("working midpoint %ld\n", LOFF(mid))); - } - - /* iterate until satisfaction or failure */ - for (;;) - { - /* try this midpoint on for size */ - er = cdissect(v, t->left, begin, mid); - if (er == REG_OKAY && - longest(v, d2, mid, end, (int *) NULL) == end && - (er = cdissect(v, t->right, mid, end)) == - REG_OKAY) - break; /* NOTE BREAK OUT */ - if (er != REG_OKAY && er != REG_NOMATCH) - { - freedfa(d); - freedfa(d2); - return er; - } - - /* that midpoint didn't work, find a new one */ - if (mid == begin) - { - /* all possibilities exhausted */ - MDEBUG(("%d no midpoint\n", t->retry)); - freedfa(d); - freedfa(d2); - return REG_NOMATCH; - } - mid = longest(v, d, begin, mid - 1, (int *) NULL); - if (mid == NULL) - { - /* failed to find a new one */ - MDEBUG(("%d failed midpoint\n", t->retry)); - freedfa(d); - freedfa(d2); - return REG_NOMATCH; - } - MDEBUG(("%d: new midpoint %ld\n", t->retry, LOFF(mid))); - v->mem[t->retry] = (mid - begin) + 1; - zapmem(v, t->left); - zapmem(v, t->right); - } - - /* satisfaction */ - MDEBUG(("successful\n")); - freedfa(d); - freedfa(d2); - return REG_OKAY; -} - -/* - * crevdissect - determine backref shortest-first subexpression matches - * The retry memory stores the offset of the trial midpoint from begin, - * plus 1 so that 0 uniquely means "clean slate". - */ -static int /* regexec return code */ -crevdissect(struct vars * v, - struct subre * t, - chr *begin, /* beginning of relevant substring */ - chr *end) /* end of same */ -{ - struct dfa *d; - struct dfa *d2; - chr *mid; - int er; - - assert(t->op == '.'); - assert(t->left != NULL && t->left->cnfa.nstates > 0); - assert(t->right != NULL && t->right->cnfa.nstates > 0); - assert(t->left->flags & SHORTER); - - /* concatenation -- need to split the substring between parts */ - d = newdfa(v, &t->left->cnfa, &v->g->cmap, DOMALLOC); - if (ISERR()) - return v->err; - d2 = newdfa(v, &t->right->cnfa, &v->g->cmap, DOMALLOC); - if (ISERR()) - { - freedfa(d); - return v->err; - } - MDEBUG(("crev %d\n", t->retry)); - - /* pick a tentative midpoint */ - if (v->mem[t->retry] == 0) - { - mid = shortest(v, d, begin, begin, end, (chr **) NULL, (int *) NULL); - if (mid == NULL) - { - freedfa(d); - freedfa(d2); - return REG_NOMATCH; - } - MDEBUG(("tentative midpoint %ld\n", LOFF(mid))); - v->mem[t->retry] = (mid - begin) + 1; - } - else - { - mid = begin + (v->mem[t->retry] - 1); - MDEBUG(("working midpoint %ld\n", LOFF(mid))); - } - - /* iterate until satisfaction or failure */ - for (;;) - { - /* try this midpoint on for size */ - er = cdissect(v, t->left, begin, mid); - if (er == REG_OKAY && - longest(v, d2, mid, end, (int *) NULL) == end && - (er = cdissect(v, t->right, mid, end)) == - REG_OKAY) - break; /* NOTE BREAK OUT */ - if (er != REG_OKAY && er != REG_NOMATCH) - { - freedfa(d); - freedfa(d2); - return er; - } - - /* that midpoint didn't work, find a new one */ - if (mid == end) - { - /* all possibilities exhausted */ - MDEBUG(("%d no midpoint\n", t->retry)); - freedfa(d); - freedfa(d2); - return REG_NOMATCH; - } - mid = shortest(v, d, begin, mid + 1, end, (chr **) NULL, (int *) NULL); - if (mid == NULL) - { - /* failed to find a new one */ - MDEBUG(("%d failed midpoint\n", t->retry)); - freedfa(d); - freedfa(d2); - return REG_NOMATCH; - } - MDEBUG(("%d: new midpoint %ld\n", t->retry, LOFF(mid))); - v->mem[t->retry] = (mid - begin) + 1; - zapmem(v, t->left); - zapmem(v, t->right); - } - - /* satisfaction */ - MDEBUG(("successful\n")); - freedfa(d); - freedfa(d2); - return REG_OKAY; -} - -/* - * cbrdissect - determine backref subexpression matches - */ -static int /* regexec return code */ -cbrdissect(struct vars * v, - struct subre * t, - chr *begin, /* beginning of relevant substring */ - chr *end) /* end of same */ -{ - int i; - int n = t->subno; - size_t len; - chr *paren; - chr *p; - chr *stop; - int min = t->min; - int max = t->max; - - assert(t != NULL); - assert(t->op == 'b'); - assert(n >= 0); - assert((size_t) n < v->nmatch); - - MDEBUG(("cbackref n%d %d{%d-%d}\n", t->retry, n, min, max)); - - if (v->pmatch[n].rm_so == -1) - return REG_NOMATCH; - paren = v->start + v->pmatch[n].rm_so; - len = v->pmatch[n].rm_eo - v->pmatch[n].rm_so; - - /* no room to maneuver -- retries are pointless */ - if (v->mem[t->retry]) - return REG_NOMATCH; - v->mem[t->retry] = 1; - - /* special-case zero-length string */ - if (len == 0) - { - if (begin == end) - return REG_OKAY; - return REG_NOMATCH; - } - - /* and too-short string */ - assert(end >= begin); - if ((size_t) (end - begin) < len) - return REG_NOMATCH; - stop = end - len; - - /* count occurrences */ - i = 0; - for (p = begin; p <= stop && (i < max || max == INFINITY); p += len) - { - if ((*v->g->compare) (paren, p, len) != 0) - break; - i++; - } - MDEBUG(("cbackref found %d\n", i)); - - /* and sort it out */ - if (p != end) /* didn't consume all of it */ - return REG_NOMATCH; - if (min <= i && (i <= max || max == INFINITY)) - return REG_OKAY; - return REG_NOMATCH; /* out of range */ -} - -/* - * caltdissect - determine alternative subexpression matches (w. complications) - */ -static int /* regexec return code */ -caltdissect(struct vars * v, - struct subre * t, - chr *begin, /* beginning of relevant substring */ - chr *end) /* end of same */ -{ - struct dfa *d; - int er; - -#define UNTRIED 0 /* not yet tried at all */ -#define TRYING 1 /* top matched, trying submatches */ -#define TRIED 2 /* top didn't match or submatches - * exhausted */ - - if (t == NULL) - return REG_NOMATCH; - assert(t->op == '|'); - if (v->mem[t->retry] == TRIED) - return caltdissect(v, t->right, begin, end); - - MDEBUG(("calt n%d\n", t->retry)); - assert(t->left != NULL); - - if (v->mem[t->retry] == UNTRIED) - { - d = newdfa(v, &t->left->cnfa, &v->g->cmap, DOMALLOC); - if (ISERR()) - return v->err; - if (longest(v, d, begin, end, (int *) NULL) != end) - { - freedfa(d); - v->mem[t->retry] = TRIED; - return caltdissect(v, t->right, begin, end); - } - freedfa(d); - MDEBUG(("calt matched\n")); - v->mem[t->retry] = TRYING; - } - - er = cdissect(v, t->left, begin, end); - if (er != REG_NOMATCH) - return er; - - v->mem[t->retry] = TRIED; - return caltdissect(v, t->right, begin, end); -} - - - -#include "rege_dfa.c" diff --git a/src/regex/regfree.c b/src/regex/regfree.c index 569bd204aa..52d09f6e66 100644 --- a/src/regex/regfree.c +++ b/src/regex/regfree.c @@ -1,57 +1,41 @@ -/* - * regfree - free an RE - * - * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. - * - * Development of this software was funded, in part, by Cray Research Inc., - * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics - * Corporation, none of whom are responsible for the results. The author - * thanks all of them. - * - * Redistribution and use in source and binary forms -- with or without - * modification -- are permitted for any purpose, provided that - * redistributions in source form retain this entire copyright notice and - * indicate the origin and nature of any modifications. - * - * I'd appreciate being given credit for this package in the documentation - * of software which uses it, but that is not a requirement. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, - * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY - * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL - * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; - * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR - * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF - * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * $Header: /projects/cvsroot/pgsql-server/src/backend/regex/regfree.c,v 1.17 2003/08/04 00:43:21 momjian Exp $ - * - * - * You might think that this could be incorporated into regcomp.c, and - * that would be a reasonable idea... except that this is a generic - * function (with a generic name), applicable to all compiled REs - * regardless of the size of their characters, whereas the stuff in - * regcomp.c gets compiled once per character size. - */ - -#include "regguts.h" +#if defined(__MWERKS__) && !defined(__MACH__) +typedef long off_t ; +#else +#include +#endif +#include +#include +#include "regex.h" +#include "utils.h" +#include "regex2.h" /* - * pg_regfree - free an RE (generic function, punts to RE-specific function) - * - * Ignoring invocation with NULL is a convenience. + - regfree - free everything + = extern void regfree(regex_t *); */ void -regfree(regex_t *re) -{ wx_regfree(re); } -void -wx_regfree(regex_t *re) +regfree(preg) +regex_t *preg; { - if (re == NULL) + register struct re_guts *g; + + if (preg->re_magic != MAGIC1) /* oops */ + return; /* nice to complain, but hard */ + + g = preg->re_g; + if (g == NULL || g->magic != MAGIC2) /* oops again */ return; - (*((struct fns *) re->re_fns)->free) (re); + preg->re_magic = 0; /* mark it invalid */ + g->magic = 0; /* mark it invalid */ + + if (g->strip != NULL) + free((char *)g->strip); + if (g->sets != NULL) + free((char *)g->sets); + if (g->setbits != NULL) + free((char *)g->setbits); + if (g->must != NULL) + free(g->must); + free((char *)g); } diff --git a/src/regex/regfronts.c b/src/regex/regfronts.c deleted file mode 100644 index 82f48e2abc..0000000000 --- a/src/regex/regfronts.c +++ /dev/null @@ -1,83 +0,0 @@ -/* - * regcomp and regexec - front ends to re_ routines - * - * Mostly for implementation of backward-compatibility kludges. Note - * that these routines exist ONLY in char versions. - * - * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. - * - * Development of this software was funded, in part, by Cray Research Inc., - * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics - * Corporation, none of whom are responsible for the results. The author - * thanks all of them. - * - * Redistribution and use in source and binary forms -- with or without - * modification -- are permitted for any purpose, provided that - * redistributions in source form retain this entire copyright notice and - * indicate the origin and nature of any modifications. - * - * I'd appreciate being given credit for this package in the documentation - * of software which uses it, but that is not a requirement. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, - * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY - * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL - * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; - * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR - * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF - * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - */ - -#include "regguts.h" - -/* - - regcomp - compile regular expression - */ -int -regcomp(re, str, flags) -regex_t *re; -CONST char *str; -int flags; -{ - size_t len; - int f = flags; - - if (f®_PEND) { - len = re->re_endp - str; - f &= ~REG_PEND; - } else - len = strlen(str); - - return re_comp(re, str, len, f); -} - -/* - - regexec - execute regular expression - */ -int -regexec(re, str, nmatch, pmatch, flags) -regex_t *re; -CONST char *str; -size_t nmatch; -regmatch_t pmatch[]; -int flags; -{ - CONST char *start; - size_t len; - int f = flags; - - if (f®_STARTEND) { - start = str + pmatch[0].rm_so; - len = pmatch[0].rm_eo - pmatch[0].rm_so; - f &= ~REG_STARTEND; - } else { - start = str; - len = strlen(str); - } - - return re_exec(re, start, len, nmatch, pmatch, f); -} diff --git a/src/regex/regguts.h b/src/regex/regguts.h deleted file mode 100644 index aa12dbf445..0000000000 --- a/src/regex/regguts.h +++ /dev/null @@ -1,417 +0,0 @@ -/* - * Internal interface definitions, etc., for the reg package - * - * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. - * - * Development of this software was funded, in part, by Cray Research Inc., - * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics - * Corporation, none of whom are responsible for the results. The author - * thanks all of them. - * - * Redistribution and use in source and binary forms -- with or without - * modification -- are permitted for any purpose, provided that - * redistributions in source form retain this entire copyright notice and - * indicate the origin and nature of any modifications. - * - * I'd appreciate being given credit for this package in the documentation - * of software which uses it, but that is not a requirement. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, - * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY - * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL - * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; - * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR - * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF - * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * $Id$ - */ - - - -/* - * Environmental customization. It should not (I hope) be necessary to - * alter the file you are now reading -- regcustom.h should handle it all, - * given care here and elsewhere. - */ -#include "regcustom.h" - - - -/* - * Things that regcustom.h might override. - */ - -/* assertions */ -#ifndef assert -#ifndef REG_DEBUG -# ifndef NDEBUG -# define NDEBUG /* no assertions */ -# endif -#endif -#include -#endif - -/* voids */ -#ifndef DISCARD -#define DISCARD void /* for throwing values away */ -#endif -#ifndef VS -#define VS(x) ((void *)(x)) /* cast something to generic ptr */ -#endif - -/* function-pointer declarator */ -#ifndef FUNCPTR -#define FUNCPTR(name, args) (*name) args -#endif - -/* memory allocation */ -#ifndef MALLOC -#define MALLOC(n) malloc(n) -#endif -#ifndef REALLOC -#define REALLOC(p, n) realloc(VS(p), n) -#endif -#ifndef FREE -#define FREE(p) free(VS(p)) -#endif - -/* want size of a char in bits, and max value in bounded quantifiers */ -#ifndef CHAR_BIT -#include -#endif -#ifndef _POSIX2_RE_DUP_MAX -#define _POSIX2_RE_DUP_MAX 255 /* normally from */ -#endif - - - -/* - * misc - */ - -#define NOTREACHED 0 -#define xxx 1 - -#define DUPMAX _POSIX2_RE_DUP_MAX -#define INFINITY (DUPMAX+1) - -#define REMAGIC 0xfed7 /* magic number for main struct */ - - - -/* - * debugging facilities - */ -#ifdef REG_DEBUG -/* FDEBUG does finite-state tracing */ -#define FDEBUG(arglist) { if (v->eflags®_FTRACE) printf arglist; } -/* MDEBUG does higher-level tracing */ -#define MDEBUG(arglist) { if (v->eflags®_MTRACE) printf arglist; } -#else -#define FDEBUG(arglist) {} -#define MDEBUG(arglist) {} -#endif - - - -/* - * bitmap manipulation - */ -#define UBITS (CHAR_BIT * sizeof(unsigned)) -#define BSET(uv, sn) ((uv)[(sn)/UBITS] |= (unsigned)1 << ((sn)%UBITS)) -#define ISBSET(uv, sn) ((uv)[(sn)/UBITS] & ((unsigned)1 << ((sn)%UBITS))) - - - -/* - * We dissect a chr into byts for colormap table indexing. Here we define - * a byt, which will be the same as a byte on most machines... The exact - * size of a byt is not critical, but about 8 bits is good, and extraction - * of 8-bit chunks is sometimes especially fast. - */ -#ifndef BYTBITS -#define BYTBITS 8 /* bits in a byt */ -#endif -#define BYTTAB (1<flags&FREECOL) - union tree *block; /* block of solid color, if any */ -}; - -/* the color map itself */ -struct colormap -{ - int magic; -#define CMMAGIC 0x876 - struct vars *v; /* for compile error reporting */ - size_t ncds; /* number of colordescs */ - size_t max; /* highest in use */ - color free; /* beginning of free chain (if non-0) */ - struct colordesc *cd; -#define CDEND(cm) (&(cm)->cd[(cm)->max + 1]) -#define NINLINECDS ((size_t)10) - struct colordesc cdspace[NINLINECDS]; - union tree tree[NBYTS]; /* tree top, plus fill blocks */ -}; - -/* optimization magic to do fast chr->color mapping */ -#define B0(c) ((c) & BYTMASK) -#define B1(c) (((c)>>BYTBITS) & BYTMASK) -#define B2(c) (((c)>>(2*BYTBITS)) & BYTMASK) -#define B3(c) (((c)>>(3*BYTBITS)) & BYTMASK) -#if NBYTS == 1 -#define GETCOLOR(cm, c) ((cm)->tree->tcolor[B0(c)]) -#endif -/* beware, for NBYTS>1, GETCOLOR() is unsafe -- 2nd arg used repeatedly */ -#if NBYTS == 2 -#define GETCOLOR(cm, c) ((cm)->tree->tptr[B1(c)]->tcolor[B0(c)]) -#endif -#if NBYTS == 4 -#define GETCOLOR(cm, c) ((cm)->tree->tptr[B3(c)]->tptr[B2(c)]->tptr[B1(c)]->tcolor[B0(c)]) -#endif - - - -/* - * Interface definitions for locale-interface functions in locale.c. - * Multi-character collating elements (MCCEs) cause most of the trouble. - */ -struct cvec -{ - int nchrs; /* number of chrs */ - int chrspace; /* number of chrs possible */ - chr *chrs; /* pointer to vector of chrs */ - int nranges; /* number of ranges (chr pairs) */ - int rangespace; /* number of chrs possible */ - chr *ranges; /* pointer to vector of chr pairs */ - int nmcces; /* number of MCCEs */ - int mccespace; /* number of MCCEs possible */ - int nmccechrs; /* number of chrs used for MCCEs */ - chr *mcces[1]; /* pointers to 0-terminated MCCEs */ - /* and both batches of chrs are on the end */ -}; - -/* caution: this value cannot be changed easily */ -#define MAXMCCE 2 /* length of longest MCCE */ - - - -/* - * definitions for NFA internal representation - * - * Having a "from" pointer within each arc may seem redundant, but it - * saves a lot of hassle. - */ -struct state; - -struct arc -{ - int type; -#define ARCFREE '\0' - color co; - struct state *from; /* where it's from (and contained within) */ - struct state *to; /* where it's to */ - struct arc *outchain; /* *from's outs chain or free chain */ -#define freechain outchain - struct arc *inchain; /* *to's ins chain */ - struct arc *colorchain; /* color's arc chain */ -}; - -struct arcbatch -{ /* for bulk allocation of arcs */ - struct arcbatch *next; -#define ABSIZE 10 - struct arc a[ABSIZE]; -}; - -struct state -{ - int no; -#define FREESTATE (-1) - char flag; /* marks special states */ - int nins; /* number of inarcs */ - struct arc *ins; /* chain of inarcs */ - int nouts; /* number of outarcs */ - struct arc *outs; /* chain of outarcs */ - struct arc *free; /* chain of free arcs */ - struct state *tmp; /* temporary for traversal algorithms */ - struct state *next; /* chain for traversing all */ - struct state *prev; /* back chain */ - struct arcbatch oas; /* first arcbatch, avoid malloc in easy - * case */ - int noas; /* number of arcs used in first arcbatch */ -}; - -struct nfa -{ - struct state *pre; /* pre-initial state */ - struct state *init; /* initial state */ - struct state *final; /* final state */ - struct state *post; /* post-final state */ - int nstates; /* for numbering states */ - struct state *states; /* state-chain header */ - struct state *slast; /* tail of the chain */ - struct state *free; /* free list */ - struct colormap *cm; /* the color map */ - color bos[2]; /* colors, if any, assigned to BOS and BOL */ - color eos[2]; /* colors, if any, assigned to EOS and EOL */ - struct vars *v; /* simplifies compile error reporting */ - struct nfa *parent; /* parent NFA, if any */ -}; - - - -/* - * definitions for compacted NFA - */ -struct carc -{ - color co; /* COLORLESS is list terminator */ - int to; /* state number */ -}; - -struct cnfa -{ - int nstates; /* number of states */ - int ncolors; /* number of colors */ - int flags; -#define HASLACONS 01 /* uses lookahead constraints */ - int pre; /* setup state number */ - int post; /* teardown state number */ - color bos[2]; /* colors, if any, assigned to BOS and BOL */ - color eos[2]; /* colors, if any, assigned to EOS and EOL */ - struct carc **states; /* vector of pointers to outarc lists */ - struct carc *arcs; /* the area for the lists */ -}; - -#define ZAPCNFA(cnfa) ((cnfa).nstates = 0) -#define NULLCNFA(cnfa) ((cnfa).nstates == 0) - - - -/* - * subexpression tree - */ -struct subre -{ - char op; /* '|', '.' (concat), 'b' (backref), '(', - * '=' */ - char flags; -#define LONGER 01 /* prefers longer match */ -#define SHORTER 02 /* prefers shorter match */ -#define MIXED 04 /* mixed preference below */ -#define CAP 010 /* capturing parens below */ -#define BACKR 020 /* back reference below */ -#define INUSE 0100 /* in use in final tree */ -#define LOCAL 03 /* bits which may not propagate up */ -#define LMIX(f) ((f)<<2) /* LONGER -> MIXED */ -#define SMIX(f) ((f)<<1) /* SHORTER -> MIXED */ -#define UP(f) (((f)&~LOCAL) | (LMIX(f) & SMIX(f) & MIXED)) -#define MESSY(f) ((f)&(MIXED|CAP|BACKR)) -#define PREF(f) ((f)&LOCAL) -#define PREF2(f1, f2) ((PREF(f1) != 0) ? PREF(f1) : PREF(f2)) -#define COMBINE(f1, f2) (UP((f1)|(f2)) | PREF2(f1, f2)) - short retry; /* index into retry memory */ - int subno; /* subexpression number (for 'b' and '(') */ - short min; /* min repetitions, for backref only */ - short max; /* max repetitions, for backref only */ - struct subre *left; /* left child, if any (also freelist - * chain) */ - struct subre *right; /* right child, if any */ - struct state *begin; /* outarcs from here... */ - struct state *end; /* ...ending in inarcs here */ - struct cnfa cnfa; /* compacted NFA, if any */ - struct subre *chain; /* for bookkeeping and error cleanup */ -}; - - - -/* - * table of function pointers for generic manipulation functions - * A regex_t's re_fns points to one of these. - */ -struct fns -{ - void FUNCPTR(free, (regex_t *)); -}; - - - -/* - * the insides of a regex_t, hidden behind a void * - */ -struct guts -{ - int magic; -#define GUTSMAGIC 0xfed9 - int cflags; /* copy of compile flags */ - long info; /* copy of re_info */ - size_t nsub; /* copy of re_nsub */ - struct subre *tree; - struct cnfa search; /* for fast preliminary search */ - int ntree; - struct colormap cmap; - int FUNCPTR(compare, (const chr *, const chr *, size_t)); - struct subre *lacons; /* lookahead-constraint vector */ - int nlacons; /* size of lacons */ -}; diff --git a/src/regex/split.c b/src/regex/split.c new file mode 100644 index 0000000000..188bdb775b --- /dev/null +++ b/src/regex/split.c @@ -0,0 +1,316 @@ +#include +#include + +/* + - split - divide a string into fields, like awk split() + = int split(char *string, char *fields[], int nfields, char *sep); + */ +int /* number of fields, including overflow */ +split(string, fields, nfields, sep) +char *string; +char *fields[]; /* list is not NULL-terminated */ +int nfields; /* number of entries available in fields[] */ +char *sep; /* "" white, "c" single char, "ab" [ab]+ */ +{ + register char *p = string; + register char c; /* latest character */ + register char sepc = sep[0]; + register char sepc2; + register int fn; + register char **fp = fields; + register char *sepp; + register int trimtrail; + + /* white space */ + if (sepc == '\0') { + while ((c = *p++) == ' ' || c == '\t') + continue; + p--; + trimtrail = 1; + sep = " \t"; /* note, code below knows this is 2 long */ + sepc = ' '; + } else + trimtrail = 0; + sepc2 = sep[1]; /* now we can safely pick this up */ + + /* catch empties */ + if (*p == '\0') + return(0); + + /* single separator */ + if (sepc2 == '\0') { + fn = nfields; + for (;;) { + *fp++ = p; + fn--; + if (fn == 0) + break; + while ((c = *p++) != sepc) + if (c == '\0') + return(nfields - fn); + *(p-1) = '\0'; + } + /* we have overflowed the fields vector -- just count them */ + fn = nfields; + for (;;) { + while ((c = *p++) != sepc) + if (c == '\0') + return(fn); + fn++; + } + /* not reached */ + } + + /* two separators */ + if (sep[2] == '\0') { + fn = nfields; + for (;;) { + *fp++ = p; + fn--; + while ((c = *p++) != sepc && c != sepc2) + if (c == '\0') { + if (trimtrail && **(fp-1) == '\0') + fn++; + return(nfields - fn); + } + if (fn == 0) + break; + *(p-1) = '\0'; + while ((c = *p++) == sepc || c == sepc2) + continue; + p--; + } + /* we have overflowed the fields vector -- just count them */ + fn = nfields; + while (c != '\0') { + while ((c = *p++) == sepc || c == sepc2) + continue; + p--; + fn++; + while ((c = *p++) != '\0' && c != sepc && c != sepc2) + continue; + } + /* might have to trim trailing white space */ + if (trimtrail) { + p--; + while ((c = *--p) == sepc || c == sepc2) + continue; + p++; + if (*p != '\0') { + if (fn == nfields+1) + *p = '\0'; + fn--; + } + } + return(fn); + } + + /* n separators */ + fn = 0; + for (;;) { + if (fn < nfields) + *fp++ = p; + fn++; + for (;;) { + c = *p++; + if (c == '\0') + return(fn); + sepp = sep; + while ((sepc = *sepp++) != '\0' && sepc != c) + continue; + if (sepc != '\0') /* it was a separator */ + break; + } + if (fn < nfields) + *(p-1) = '\0'; + for (;;) { + c = *p++; + sepp = sep; + while ((sepc = *sepp++) != '\0' && sepc != c) + continue; + if (sepc == '\0') /* it wasn't a separator */ + break; + } + p--; + } + + /* not reached */ +} + +#ifdef TEST_SPLIT + + +/* + * test program + * pgm runs regression + * pgm sep splits stdin lines by sep + * pgm str sep splits str by sep + * pgm str sep n splits str by sep n times + */ +int +main(argc, argv) +int argc; +char *argv[]; +{ + char buf[512]; + register int n; +# define MNF 10 + char *fields[MNF]; + + if (argc > 4) + for (n = atoi(argv[3]); n > 0; n--) { + (void) strcpy(buf, argv[1]); + } + else if (argc > 3) + for (n = atoi(argv[3]); n > 0; n--) { + (void) strcpy(buf, argv[1]); + (void) split(buf, fields, MNF, argv[2]); + } + else if (argc > 2) + dosplit(argv[1], argv[2]); + else if (argc > 1) + while (fgets(buf, sizeof(buf), stdin) != NULL) { + buf[strlen(buf)-1] = '\0'; /* stomp newline */ + dosplit(buf, argv[1]); + } + else + regress(); + + exit(0); +} + +dosplit(string, seps) +char *string; +char *seps; +{ +# define NF 5 + char *fields[NF]; + register int nf; + + nf = split(string, fields, NF, seps); + print(nf, NF, fields); +} + +print(nf, nfp, fields) +int nf; +int nfp; +char *fields[]; +{ + register int fn; + register int bound; + + bound = (nf > nfp) ? nfp : nf; + printf("%d:\t", nf); + for (fn = 0; fn < bound; fn++) + printf("\"%s\"%s", fields[fn], (fn+1 < nf) ? ", " : "\n"); +} + +#define RNF 5 /* some table entries know this */ +struct { + char *str; + char *seps; + int nf; + char *fi[RNF]; +} tests[] = { + "", " ", 0, { "" }, + " ", " ", 2, { "", "" }, + "x", " ", 1, { "x" }, + "xy", " ", 1, { "xy" }, + "x y", " ", 2, { "x", "y" }, + "abc def g ", " ", 5, { "abc", "def", "", "g", "" }, + " a bcd", " ", 4, { "", "", "a", "bcd" }, + "a b c d e f", " ", 6, { "a", "b", "c", "d", "e f" }, + " a b c d ", " ", 6, { "", "a", "b", "c", "d " }, + + "", " _", 0, { "" }, + " ", " _", 2, { "", "" }, + "x", " _", 1, { "x" }, + "x y", " _", 2, { "x", "y" }, + "ab _ cd", " _", 2, { "ab", "cd" }, + " a_b c ", " _", 5, { "", "a", "b", "c", "" }, + "a b c_d e f", " _", 6, { "a", "b", "c", "d", "e f" }, + " a b c d ", " _", 6, { "", "a", "b", "c", "d " }, + + "", " _~", 0, { "" }, + " ", " _~", 2, { "", "" }, + "x", " _~", 1, { "x" }, + "x y", " _~", 2, { "x", "y" }, + "ab _~ cd", " _~", 2, { "ab", "cd" }, + " a_b c~", " _~", 5, { "", "a", "b", "c", "" }, + "a b_c d~e f", " _~", 6, { "a", "b", "c", "d", "e f" }, + "~a b c d ", " _~", 6, { "", "a", "b", "c", "d " }, + + "", " _~-", 0, { "" }, + " ", " _~-", 2, { "", "" }, + "x", " _~-", 1, { "x" }, + "x y", " _~-", 2, { "x", "y" }, + "ab _~- cd", " _~-", 2, { "ab", "cd" }, + " a_b c~", " _~-", 5, { "", "a", "b", "c", "" }, + "a b_c-d~e f", " _~-", 6, { "a", "b", "c", "d", "e f" }, + "~a-b c d ", " _~-", 6, { "", "a", "b", "c", "d " }, + + "", " ", 0, { "" }, + " ", " ", 2, { "", "" }, + "x", " ", 1, { "x" }, + "xy", " ", 1, { "xy" }, + "x y", " ", 2, { "x", "y" }, + "abc def g ", " ", 4, { "abc", "def", "g", "" }, + " a bcd", " ", 3, { "", "a", "bcd" }, + "a b c d e f", " ", 6, { "a", "b", "c", "d", "e f" }, + " a b c d ", " ", 6, { "", "a", "b", "c", "d " }, + + "", "", 0, { "" }, + " ", "", 0, { "" }, + "x", "", 1, { "x" }, + "xy", "", 1, { "xy" }, + "x y", "", 2, { "x", "y" }, + "abc def g ", "", 3, { "abc", "def", "g" }, + "\t a bcd", "", 2, { "a", "bcd" }, + " a \tb\t c ", "", 3, { "a", "b", "c" }, + "a b c d e ", "", 5, { "a", "b", "c", "d", "e" }, + "a b\tc d e f", "", 6, { "a", "b", "c", "d", "e f" }, + " a b c d e f ", "", 6, { "a", "b", "c", "d", "e f " }, + + NULL, NULL, 0, { NULL }, +}; + +regress() +{ + char buf[512]; + register int n; + char *fields[RNF+1]; + register int nf; + register int i; + register int printit; + register char *f; + + for (n = 0; tests[n].str != NULL; n++) { + (void) strcpy(buf, tests[n].str); + fields[RNF] = NULL; + nf = split(buf, fields, RNF, tests[n].seps); + printit = 0; + if (nf != tests[n].nf) { + printf("split `%s' by `%s' gave %d fields, not %d\n", + tests[n].str, tests[n].seps, nf, tests[n].nf); + printit = 1; + } else if (fields[RNF] != NULL) { + printf("split() went beyond array end\n"); + printit = 1; + } else { + for (i = 0; i < nf && i < RNF; i++) { + f = fields[i]; + if (f == NULL) + f = "(NULL)"; + if (strcmp(f, tests[n].fi[i]) != 0) { + printf("split `%s' by `%s', field %d is `%s', not `%s'\n", + tests[n].str, tests[n].seps, + i, fields[i], tests[n].fi[i]); + printit = 1; + } + } + } + if (printit) + print(nf, RNF, fields); + } +} +#endif diff --git a/src/regex/tclUniData.c b/src/regex/tclUniData.c deleted file mode 100644 index 9f0c6e05ae..0000000000 --- a/src/regex/tclUniData.c +++ /dev/null @@ -1,904 +0,0 @@ -/* - * tclUniData.c -- - * - * Declarations of Unicode character information tables. This file is - * automatically generated by the tools/uniParse.tcl script. Do not - * modify this file by hand. - * - * Copyright (c) 1998 by Scriptics Corporation. - * All rights reserved. - * - * RCS: @(#) $Id$ - */ - -/* - * A 16-bit Unicode character is split into two parts in order to index - * into the following tables. The lower OFFSET_BITS comprise an offset - * into a page of characters. The upper bits comprise the page number. - */ - -#define OFFSET_BITS 5 - -/* - * The pageMap is indexed by page number and returns an alternate page number - * that identifies a unique page of characters. Many Unicode characters map - * to the same alternate page number. - */ - -static unsigned char pageMap[] = { - 0, 1, 2, 3, 0, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 7, 15, 16, 17, - 18, 19, 20, 21, 22, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 7, 32, - 7, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 47, - 48, 49, 50, 51, 52, 35, 47, 53, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, - 58, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 80, 81, - 84, 85, 80, 86, 87, 88, 89, 90, 91, 92, 35, 93, 94, 95, 35, 96, 97, - 98, 99, 100, 101, 102, 35, 47, 103, 104, 35, 35, 105, 106, 107, 47, - 47, 108, 47, 47, 109, 47, 110, 111, 47, 112, 47, 113, 114, 115, 116, - 114, 47, 117, 118, 35, 47, 47, 119, 90, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 120, 121, 47, 47, 122, - 35, 35, 35, 35, 47, 123, 124, 125, 126, 47, 127, 128, 47, 129, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 7, 7, 7, 7, 130, 7, 7, 131, 132, 133, 134, - 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, - 149, 150, 151, 152, 153, 154, 155, 156, 156, 156, 156, 156, 156, 156, - 157, 158, 159, 160, 161, 162, 35, 35, 35, 160, 163, 164, 165, 166, - 167, 168, 169, 160, 160, 160, 160, 170, 171, 172, 173, 174, 160, 160, - 175, 35, 35, 35, 35, 176, 177, 178, 179, 180, 181, 35, 35, 160, 160, - 160, 160, 160, 160, 160, 160, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 182, 160, 160, 155, 160, 160, 160, 160, 160, 160, 170, 183, 184, 185, - 90, 47, 186, 90, 47, 187, 188, 189, 47, 47, 190, 128, 35, 35, 191, - 192, 193, 194, 192, 195, 196, 197, 160, 160, 160, 198, 160, 160, 199, - 197, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 200, 35, 35, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 201, 35, 35, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 202, 203, 204, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 205, 35, 35, 206, 206, 206, 206, 206, 206, 206, 206, 206, 206, - 206, 206, 206, 206, 206, 206, 206, 206, 206, 206, 206, 206, 206, 206, - 206, 206, 206, 206, 206, 206, 206, 206, 206, 206, 206, 206, 206, 206, - 206, 206, 206, 206, 206, 206, 206, 206, 206, 206, 206, 206, 206, 206, - 206, 206, 206, 206, 206, 206, 206, 206, 206, 206, 206, 206, 207, 207, - 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, - 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, - 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, - 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, - 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, - 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, - 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, - 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, - 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, - 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, - 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, - 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, - 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, - 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, - 207, 207, 47, 47, 47, 47, 47, 47, 47, 47, 47, 208, 35, 35, 35, 35, - 35, 35, 209, 210, 211, 47, 47, 212, 213, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 214, 215, 47, 216, 47, 217, 218, 35, 219, 220, 221, 47, - 47, 47, 222, 223, 2, 224, 225, 226, 227, 228, 229, 230, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 231, 35, 232, 233, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 208, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 47, 234, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 235, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, - 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, - 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, - 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, - 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, - 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, - 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, - 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, - 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, - 207, 207, 207, 236, 207, 207, 207, 207, 207, 207, 207, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35 -}; - -/* - * The groupMap is indexed by combining the alternate page number with - * the page offset and returns a group number that identifies a unique - * set of character attributes. - */ - -static unsigned char groupMap[] = { - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, 3, 3, 4, 3, 3, 3, 5, 6, 3, 7, 3, 8, - 3, 3, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 3, 3, 7, 7, 7, 3, 3, 10, 10, 10, - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, - 10, 10, 10, 10, 10, 10, 5, 3, 6, 11, 12, 11, 13, 13, 13, 13, 13, 13, - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, - 13, 13, 13, 5, 7, 6, 7, 1, 2, 3, 4, 4, 4, 4, 14, 14, 11, 14, 15, 16, - 7, 8, 14, 11, 14, 7, 17, 17, 11, 18, 14, 3, 11, 17, 15, 19, 17, 17, - 17, 3, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, - 10, 10, 10, 10, 10, 10, 10, 10, 7, 10, 10, 10, 10, 10, 10, 10, 15, - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, - 13, 13, 13, 13, 13, 13, 7, 13, 13, 13, 13, 13, 13, 13, 20, 21, 22, - 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, - 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, - 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 23, 24, 21, 22, 21, - 22, 21, 22, 15, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, - 22, 21, 22, 15, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, - 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, - 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 25, - 21, 22, 21, 22, 21, 22, 26, 15, 27, 21, 22, 21, 22, 28, 21, 22, 29, - 29, 21, 22, 15, 30, 31, 32, 21, 22, 29, 33, 34, 35, 36, 21, 22, 15, - 15, 35, 37, 15, 38, 21, 22, 21, 22, 21, 22, 39, 21, 22, 39, 15, 15, - 21, 22, 39, 21, 22, 40, 40, 21, 22, 21, 22, 41, 21, 22, 15, 42, 21, - 22, 15, 43, 42, 42, 42, 42, 44, 45, 46, 44, 45, 46, 44, 45, 46, 21, - 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 47, 21, - 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, - 15, 44, 45, 46, 21, 22, 48, 49, 21, 22, 21, 22, 21, 22, 21, 22, 0, - 0, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, - 21, 22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 15, 15, 15, 50, 51, 15, 52, 52, 15, 53, 15, - 54, 15, 15, 15, 15, 52, 15, 15, 55, 15, 15, 15, 15, 56, 57, 15, 15, - 15, 15, 15, 57, 15, 15, 58, 15, 15, 59, 15, 15, 15, 15, 15, 15, 15, - 15, 15, 15, 60, 15, 15, 60, 15, 15, 15, 15, 60, 15, 61, 61, 15, 15, - 15, 15, 15, 15, 62, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 0, 0, 63, - 63, 63, 63, 63, 63, 63, 63, 63, 11, 11, 63, 63, 63, 63, 63, 63, 63, - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 63, 63, 11, - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 63, 63, 63, 63, - 63, 11, 11, 11, 11, 11, 11, 11, 11, 11, 63, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 65, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, - 64, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11, 11, - 0, 0, 0, 0, 63, 0, 0, 0, 3, 0, 0, 0, 0, 0, 11, 11, 66, 3, 67, 67, 67, - 0, 68, 0, 69, 69, 15, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, - 10, 10, 10, 10, 10, 0, 10, 10, 10, 10, 10, 10, 10, 10, 10, 70, 71, - 71, 71, 15, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, - 13, 13, 13, 72, 13, 13, 13, 13, 13, 13, 13, 13, 13, 73, 74, 74, 0, - 75, 76, 77, 77, 77, 78, 79, 15, 0, 0, 21, 22, 21, 22, 21, 22, 21, 22, - 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 80, 81, 47, - 15, 82, 83, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 84, 84, 84, 84, 84, 84, 84, - 84, 84, 84, 84, 84, 84, 84, 84, 84, 10, 10, 10, 10, 10, 10, 10, 10, - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, - 10, 10, 10, 10, 10, 10, 10, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, - 13, 13, 13, 13, 13, 81, 81, 81, 81, 81, 81, 81, 81, 81, 81, 81, 81, - 81, 81, 81, 81, 21, 22, 14, 64, 64, 64, 64, 0, 85, 85, 0, 0, 21, 22, - 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, - 22, 77, 21, 22, 21, 22, 0, 0, 21, 22, 0, 0, 21, 22, 0, 0, 0, 21, 22, - 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, - 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, - 21, 22, 0, 0, 21, 22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 86, 86, 86, 86, - 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, - 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, - 0, 0, 63, 3, 3, 3, 3, 3, 3, 0, 87, 87, 87, 87, 87, 87, 87, 87, 87, - 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, - 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 15, 0, 3, 8, 0, 0, - 0, 0, 0, 0, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 0, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 0, 64, 64, 64, 3, 64, 3, 64, - 64, 3, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 0, 0, 0, 0, 0, 42, 42, 42, 3, 3, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 3, 0, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 0, 0, 0, 0, 0, 63, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 3, 3, 3, 3, 0, 0, 64, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 3, 42, 64, - 64, 64, 64, 64, 64, 64, 85, 85, 64, 64, 64, 64, 64, 64, 63, 63, 64, - 64, 14, 64, 64, 64, 64, 0, 0, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 42, 42, - 42, 14, 14, 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 88, 42, - 64, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 0, 0, 0, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 42, 42, 42, 42, 42, 42, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, - 64, 89, 0, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 0, 0, 64, 42, 89, 89, 89, 64, 64, 64, 64, 64, 64, - 64, 64, 89, 89, 89, 89, 64, 0, 0, 42, 64, 64, 64, 64, 0, 0, 0, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 64, 64, 3, 3, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, - 89, 89, 0, 42, 42, 42, 42, 42, 42, 42, 42, 0, 0, 42, 42, 0, 0, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 0, 42, 42, 42, 42, 42, 42, 42, 0, 42, 0, 0, 0, 42, - 42, 42, 42, 0, 0, 64, 0, 89, 89, 89, 64, 64, 64, 64, 0, 0, 89, 89, - 0, 0, 89, 89, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 89, 0, 0, 0, 0, 42, 42, - 0, 42, 42, 42, 64, 64, 0, 0, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 42, 42, - 4, 4, 17, 17, 17, 17, 17, 17, 14, 0, 0, 0, 0, 0, 0, 0, 64, 0, 0, 42, - 42, 42, 42, 42, 42, 0, 0, 0, 0, 42, 42, 0, 0, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 0, - 42, 42, 42, 42, 42, 42, 42, 0, 42, 42, 0, 42, 42, 0, 42, 42, 0, 0, - 64, 0, 89, 89, 89, 64, 64, 0, 0, 0, 0, 64, 64, 0, 0, 64, 64, 64, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42, 42, 42, 42, 0, 42, 0, 0, 0, 0, 0, - 0, 0, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 64, 64, 42, 42, 42, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 64, 64, 89, 0, 42, 42, 42, 42, 42, 42, 42, - 0, 42, 0, 42, 42, 42, 0, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 0, 42, 42, 42, 42, 42, - 42, 42, 0, 42, 42, 0, 42, 42, 42, 42, 42, 0, 0, 64, 42, 89, 89, 89, - 64, 64, 64, 64, 64, 0, 64, 64, 89, 0, 89, 89, 64, 0, 0, 42, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42, 0, 0, 0, 0, 0, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 0, 42, 42, 42, 42, 42, 42, 42, 0, 42, - 42, 0, 0, 42, 42, 42, 42, 0, 0, 64, 42, 89, 64, 89, 64, 64, 64, 0, - 0, 0, 89, 89, 0, 0, 89, 89, 64, 0, 0, 0, 0, 0, 0, 0, 0, 64, 89, 0, - 0, 0, 0, 42, 42, 0, 42, 42, 42, 0, 0, 0, 0, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, 89, - 0, 42, 42, 42, 42, 42, 42, 0, 0, 0, 42, 42, 42, 0, 42, 42, 42, 42, - 0, 0, 0, 42, 42, 0, 42, 0, 42, 42, 0, 0, 0, 42, 42, 0, 0, 0, 42, 42, - 42, 0, 0, 0, 42, 42, 42, 42, 42, 42, 42, 42, 0, 42, 42, 42, 0, 0, 0, - 0, 89, 89, 64, 89, 89, 0, 0, 0, 89, 89, 89, 0, 89, 89, 89, 64, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 89, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 9, 9, 9, 9, 9, 9, 9, 9, 9, 17, 17, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 89, 89, 89, 0, 42, 42, 42, 42, 42, 42, 42, 42, 0, 42, - 42, 42, 0, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 0, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 0, 42, 42, 42, 42, 42, 0, 0, 0, 0, 64, 64, 64, 89, 89, - 89, 89, 0, 64, 64, 64, 0, 64, 64, 64, 64, 0, 0, 0, 0, 0, 0, 0, 64, - 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42, 42, 0, 0, 0, 0, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 89, - 89, 0, 42, 42, 42, 42, 42, 42, 42, 42, 0, 42, 42, 42, 0, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 0, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 0, 42, 42, 42, - 42, 42, 0, 0, 0, 0, 89, 64, 89, 89, 89, 89, 89, 0, 64, 89, 89, 0, 89, - 89, 64, 64, 0, 0, 0, 0, 0, 0, 0, 89, 89, 0, 0, 0, 0, 0, 0, 0, 42, 0, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 0, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 0, 0, 0, 0, 89, 89, 89, 64, 64, - 64, 0, 0, 89, 89, 89, 0, 89, 89, 89, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 89, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 89, 89, 0, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 0, 0, 0, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 0, 42, 42, 42, 42, 42, 42, 42, 42, 42, 0, 42, 0, 0, - 42, 42, 42, 42, 42, 42, 42, 0, 0, 0, 64, 0, 0, 0, 0, 89, 89, 89, 64, - 64, 64, 0, 64, 0, 89, 89, 89, 89, 89, 89, 89, 89, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 89, 89, 3, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 64, 42, 42, 64, 64, 64, 64, 64, 64, 64, 0, 0, 0, 0, 4, 42, 42, - 42, 42, 42, 42, 63, 64, 64, 64, 64, 64, 64, 64, 64, 3, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 3, 3, 0, 0, 0, 0, 0, 42, 42, 0, 42, 0, 0, 42, 42, - 0, 42, 0, 0, 42, 0, 0, 0, 0, 0, 0, 42, 42, 42, 42, 0, 42, 42, 42, 42, - 42, 42, 42, 0, 42, 42, 42, 0, 42, 0, 42, 0, 0, 42, 42, 0, 42, 42, 42, - 42, 64, 42, 42, 64, 64, 64, 64, 64, 64, 0, 64, 64, 42, 0, 0, 42, 42, - 42, 42, 42, 0, 63, 0, 64, 64, 64, 64, 64, 64, 0, 0, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 0, 0, 42, 42, 0, 0, 42, 14, 14, 14, 3, 3, 3, 3, 3, 3, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 14, 14, 14, 14, 14, 64, 64, 14, 14, 14, - 14, 14, 14, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 17, 17, 17, 17, 17, 17, 17, - 17, 17, 17, 14, 64, 14, 64, 14, 64, 5, 6, 5, 6, 89, 89, 42, 42, 42, - 42, 42, 42, 42, 42, 0, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 0, 0, 0, 0, 0, 0, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 89, 64, 64, 64, 64, 64, 3, 64, 64, 42, - 42, 42, 42, 0, 0, 0, 0, 64, 64, 64, 64, 64, 64, 64, 64, 0, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, - 0, 14, 14, 14, 14, 14, 14, 14, 14, 64, 14, 14, 14, 14, 14, 14, 0, 0, - 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42, 42, 0, 42, - 42, 42, 42, 42, 0, 42, 42, 0, 89, 64, 64, 64, 64, 89, 64, 0, 0, 0, - 64, 64, 89, 64, 0, 0, 0, 0, 0, 0, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 3, - 3, 3, 3, 3, 3, 42, 42, 42, 42, 42, 42, 89, 89, 64, 64, 0, 0, 0, 0, - 0, 0, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 0, 0, 0, 0, 3, 0, 0, 0, 0, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 0, - 0, 0, 0, 0, 42, 42, 42, 42, 0, 0, 0, 0, 0, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 0, 0, 0, 0, 0, 0, 42, 42, 42, - 42, 42, 42, 42, 0, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 0, 42, 0, 42, 42, 42, 42, 0, 0, 42, 42, 42, 42, 42, 42, 42, - 0, 42, 0, 42, 42, 42, 42, 0, 0, 42, 42, 42, 42, 42, 42, 42, 0, 42, - 0, 42, 42, 42, 42, 0, 0, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 0, 42, 0, 42, 42, 42, 42, 0, 0, 42, 42, 42, 42, 42, 42, - 42, 0, 42, 0, 42, 42, 42, 42, 0, 0, 42, 42, 42, 42, 42, 42, 42, 0, - 42, 42, 42, 42, 42, 42, 42, 0, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 0, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 0, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, - 3, 9, 9, 9, 9, 9, 9, 9, 9, 9, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, - 17, 0, 0, 0, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 3, 3, 42, 42, 42, 42, 42, - 42, 42, 42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 5, 6, 0, 0, 0, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 3, 3, 3, 90, 90, 90, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 89, 89, 89, 64, 64, 64, 64, 64, 64, 64, 89, 89, 89, 89, 89, - 89, 89, 89, 64, 89, 89, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, - 3, 3, 3, 3, 3, 3, 3, 4, 3, 0, 0, 0, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, - 3, 3, 3, 3, 8, 3, 3, 3, 3, 88, 88, 88, 88, 0, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 0, 0, 0, 0, 0, 0, 42, 42, 42, 63, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 0, 0, 0, 0, 0, 0, 0, - 0, 42, 42, 42, 42, 42, 42, 42, 42, 42, 64, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 21, 22, 21, 22, 21, 22, 21, - 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 15, 15, - 15, 15, 15, 91, 0, 0, 0, 0, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, - 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 0, - 0, 0, 0, 0, 0, 92, 92, 92, 92, 92, 92, 92, 92, 93, 93, 93, 93, 93, - 93, 93, 93, 92, 92, 92, 92, 92, 92, 0, 0, 93, 93, 93, 93, 93, 93, 0, - 0, 92, 92, 92, 92, 92, 92, 92, 92, 93, 93, 93, 93, 93, 93, 93, 93, - 92, 92, 92, 92, 92, 92, 92, 92, 93, 93, 93, 93, 93, 93, 93, 93, 92, - 92, 92, 92, 92, 92, 0, 0, 93, 93, 93, 93, 93, 93, 0, 0, 15, 92, 15, - 92, 15, 92, 15, 92, 0, 93, 0, 93, 0, 93, 0, 93, 92, 92, 92, 92, 92, - 92, 92, 92, 93, 93, 93, 93, 93, 93, 93, 93, 94, 94, 95, 95, 95, 95, - 96, 96, 97, 97, 98, 98, 99, 99, 0, 0, 92, 92, 92, 92, 92, 92, 92, 92, - 100, 100, 100, 100, 100, 100, 100, 100, 92, 92, 92, 92, 92, 92, 92, - 92, 100, 100, 100, 100, 100, 100, 100, 100, 92, 92, 92, 92, 92, 92, - 92, 92, 100, 100, 100, 100, 100, 100, 100, 100, 92, 92, 15, 101, 15, - 0, 15, 15, 93, 93, 102, 102, 103, 11, 104, 11, 11, 11, 15, 101, 15, - 0, 15, 15, 105, 105, 105, 105, 103, 11, 11, 11, 92, 92, 15, 15, 0, - 0, 15, 15, 93, 93, 106, 106, 0, 11, 11, 11, 92, 92, 15, 15, 15, 107, - 15, 15, 93, 93, 108, 108, 109, 11, 11, 11, 0, 0, 15, 101, 15, 0, 15, - 15, 110, 110, 111, 111, 103, 11, 11, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 88, 88, 88, 88, 8, 8, 8, 8, 8, 8, 3, 3, 16, 19, 5, 16, 16, - 19, 5, 16, 3, 3, 3, 3, 3, 3, 3, 3, 112, 113, 88, 88, 88, 88, 88, 2, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 16, 19, 3, 3, 3, 3, 12, 12, 3, 3, 3, 7, - 5, 6, 0, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 88, 88, 88, 88, 88, 88, 17, - 0, 0, 0, 17, 17, 17, 17, 17, 17, 7, 7, 7, 5, 6, 15, 17, 17, 17, 17, - 17, 17, 17, 17, 17, 17, 7, 7, 7, 5, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 85, 85, 85, 85, 64, 85, 85, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 14, 14, 77, - 14, 14, 14, 14, 77, 14, 14, 15, 77, 77, 77, 15, 15, 77, 77, 77, 15, - 14, 77, 14, 14, 14, 77, 77, 77, 77, 77, 14, 14, 14, 14, 14, 14, 77, - 14, 114, 14, 77, 14, 115, 116, 77, 77, 14, 15, 77, 77, 14, 77, 15, - 42, 42, 42, 42, 15, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, - 17, 17, 17, 117, 117, 117, 117, 117, 117, 117, 117, 117, 117, 117, - 117, 117, 117, 117, 117, 118, 118, 118, 118, 118, 118, 118, 118, 118, - 118, 118, 118, 118, 118, 118, 118, 90, 90, 90, 90, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 7, 7, 7, 7, 7, 14, 14, 14, 14, 14, 7, 7, 14, 14, - 14, 14, 7, 14, 14, 7, 14, 14, 7, 14, 14, 14, 14, 14, 14, 14, 7, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 7, 7, 14, 14, 7, - 14, 7, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 14, 14, 14, 14, 14, 14, 14, 14, 7, 7, 7, 7, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 7, 7, 14, 14, 14, 14, 14, 14, 14, 5, 6, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 0, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 0, 0, 0, 0, 0, 14, 14, 14, 14, 14, 14, 14, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 17, 17, 17, 17, 17, 17, 17, 17, - 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, - 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, - 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, - 17, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 119, 119, 119, 119, 119, 119, - 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, - 119, 119, 119, 119, 119, 119, 120, 120, 120, 120, 120, 120, 120, 120, - 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, - 120, 120, 120, 120, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 7, 14, 14, 14, 14, 14, 14, 14, 14, 14, 7, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 0, 0, 0, 0, 0, 0, 0, 0, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 0, 0, 0, 0, 0, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 7, 14, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 14, - 14, 14, 14, 0, 14, 14, 14, 14, 0, 0, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 0, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 0, 14, 0, 14, 14, 14, 14, 0, 0, 0, 14, 0, 14, 14, - 14, 14, 14, 14, 14, 0, 0, 14, 14, 14, 14, 14, 14, 14, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, - 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, - 17, 17, 17, 14, 0, 0, 0, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 0, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 0, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 0, 14, 14, 14, 14, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 0, - 0, 0, 0, 2, 3, 3, 3, 14, 63, 42, 90, 5, 6, 5, 6, 5, 6, 5, 6, 5, 6, - 14, 14, 5, 6, 5, 6, 5, 6, 5, 6, 8, 5, 6, 6, 14, 90, 90, 90, 90, 90, - 90, 90, 90, 90, 64, 64, 64, 64, 64, 64, 8, 63, 63, 63, 63, 63, 14, - 14, 90, 90, 90, 0, 0, 0, 14, 14, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 0, 0, 0, 0, 64, 64, - 11, 11, 63, 63, 0, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 12, 63, - 63, 63, 0, 0, 0, 0, 0, 0, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 0, 0, 0, 0, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 0, 14, 14, 17, 17, 17, - 17, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 0, 0, 0, 17, 17, 17, 17, 17, 17, 17, 17, 17, - 17, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 0, 0, 0, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 0, 0, 0, 0, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 0, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 0, 0, 0, 0, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 0, 0, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 42, 42, 42, 42, 42, 42, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 0, 0, 0, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 0, 0, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 0, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 0, 14, 14, 14, 0, 14, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 42, 42, 42, 42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 121, 121, 121, 121, 121, 121, 121, - 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, - 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 122, 122, 122, - 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, - 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, - 122, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15, 15, 15, 15, 15, - 15, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15, 15, 15, 15, 15, 0, - 0, 0, 0, 0, 42, 64, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 7, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 0, 42, 42, 42, 42, - 42, 0, 42, 0, 42, 42, 0, 42, 42, 0, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 5, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 0, 0, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 0, 0, 0, 0, 64, 64, 64, 64, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 8, 8, 12, 12, 5, 6, 5, 6, 5, - 6, 5, 6, 5, 6, 5, 6, 5, 6, 5, 6, 0, 0, 0, 0, 3, 3, 3, 3, 12, 12, 12, - 3, 3, 3, 0, 3, 3, 3, 3, 8, 5, 6, 5, 6, 5, 6, 3, 3, 3, 7, 8, 7, 7, 7, - 0, 3, 4, 3, 3, 0, 0, 0, 0, 42, 42, 42, 0, 42, 0, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 0, 0, 88, 0, 3, 3, 3, 4, 3, 3, 3, 5, 6, 3, 7, 3, 8, 3, 3, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 3, 3, 7, 7, 7, 3, 11, 13, 13, 13, 13, 13, 13, - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, - 13, 13, 13, 5, 7, 6, 7, 0, 0, 3, 5, 6, 3, 12, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 63, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 63, - 63, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 0, 0, 0, - 42, 42, 42, 42, 42, 42, 0, 0, 42, 42, 42, 42, 42, 42, 0, 0, 42, 42, - 42, 42, 42, 42, 0, 0, 42, 42, 42, 0, 0, 0, 4, 4, 7, 11, 14, 4, 4, 0, - 14, 7, 7, 7, 7, 14, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 88, 88, 88, 14, - 14, 42, 17, 42, 42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 123, 123, 123, - 126, 126, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 89, 64, 14, 14, 14, - 14, 14, 0, 0, 77, 77, 15, 15, 77, 15, 15, 77, 77, 15, 77, 77, 15, 77, - 77, 15, 15, 77, 15, 15, 77, 77, 15, 77, 77, 15, 77, 77, 15, 15, 77, - 15, 15, 77, 77, 15, 77, 77, 15, 77, 77, 15, 15, 77, 77, 15, 15, 77, - 15, 15, 77, 77, 15, 15, 77, 15, 15, 77, 77, 15, 15, 9, 9, 9, 42, 42, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 88, 0, 88, 88, 88, 88, 88, 88, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 122, 122, - 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, - 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, - 122 -}; - -/* - * Each group represents a unique set of character attributes. The attributes - * are encoded into a 32-bit value as follows: - * - * Bits 0-4 Character category: see the constants listed below. - * - * Bits 5-7 Case delta type: 000 = identity - * 010 = add delta for lower - * 011 = add delta for lower, add 1 for title - * 100 = sutract delta for title/upper - * 101 = sub delta for upper, sub 1 for title - * 110 = sub delta for upper, add delta for lower - * - * Bits 8-21 Reserved for future use. - * - * Bits 22-31 Case delta: delta for case conversions. This should be the - * highest field so we can easily sign extend. - */ - -static int groups[] = { - 0, 15, 12, 25, 27, 21, 22, 26, 20, 9, 134217793, 28, 19, 134217858, - 29, 2, 23, 11, 1178599554, 24, -507510654, 4194369, 4194434, -834666431, - 973078658, -507510719, 1258291330, 880803905, 864026689, 859832385, - 331350081, 847249473, 851443777, 868220993, -406847358, 884998209, - 876609601, 893386817, 897581121, 914358337, 910164033, 918552641, - 5, -234880894, 8388705, 4194499, 8388770, 331350146, -406847423, - -234880959, 880803970, 864026754, 859832450, 847249538, 851443842, - 868221058, 876609666, 884998274, 893386882, 897581186, 914358402, - 910164098, 918552706, 4, 6, -352321402, 159383617, 155189313, - 268435521, 264241217, 159383682, 155189378, 130023554, 268435586, - 264241282, 260046978, 239075458, 1, 197132418, 226492546, 360710274, - 335544450, -251658175, 402653314, 335544385, 7, 201326657, 201326722, - 16, 8, 10, 247464066, -33554302, -33554367, -310378366, -360710014, - -419430270, -536870782, -469761918, -528482174, -33554365, -37748606, - -310378431, -37748669, 155189378, -360710079, -419430335, -29359998, - -469761983, -29360063, -536870847, -528482239, 13, 14, -1463812031, - -801111999, -293601215, 67108938, 67109002, 109051997, 109052061, - 18, 17, 8388673, 12582977, 8388738, 12583042 -}; - -/* - * The following constants are used to determine the category of a - * Unicode character. - */ - -#define UNICODE_CATEGORY_MASK 0X1F - -enum { - UNASSIGNED, - UPPERCASE_LETTER, - LOWERCASE_LETTER, - TITLECASE_LETTER, - MODIFIER_LETTER, - OTHER_LETTER, - NON_SPACING_MARK, - ENCLOSING_MARK, - COMBINING_SPACING_MARK, - DECIMAL_DIGIT_NUMBER, - LETTER_NUMBER, - OTHER_NUMBER, - SPACE_SEPARATOR, - LINE_SEPARATOR, - PARAGRAPH_SEPARATOR, - CONTROL, - FORMAT, - PRIVATE_USE, - SURROGATE, - CONNECTOR_PUNCTUATION, - DASH_PUNCTUATION, - OPEN_PUNCTUATION, - CLOSE_PUNCTUATION, - INITIAL_QUOTE_PUNCTUATION, - FINAL_QUOTE_PUNCTUATION, - OTHER_PUNCTUATION, - MATH_SYMBOL, - CURRENCY_SYMBOL, - MODIFIER_SYMBOL, - OTHER_SYMBOL -}; - -/* - * The following macros extract the fields of the character info. The - * GetDelta() macro is complicated because we can't rely on the C compiler - * to do sign extension on right shifts. - */ - -#define GetCaseType(info) (((info) & 0xE0) >> 5) -#define GetCategory(info) ((info) & 0x1F) -#define GetDelta(info) (((info) > 0) ? ((info) >> 22) : (~(~((info)) >> 22))) - -/* - * This macro extracts the information about a character from the - * Unicode character tables. - */ - -#define GetUniCharInfo(ch) (groups[groupMap[(pageMap[(((int)(ch)) & 0xffff) >> OFFSET_BITS] << OFFSET_BITS) | ((ch) & ((1 << OFFSET_BITS)-1))]]) - diff --git a/src/regex/utils.h b/src/regex/utils.h new file mode 100644 index 0000000000..1a997ac8fc --- /dev/null +++ b/src/regex/utils.h @@ -0,0 +1,22 @@ +/* utility definitions */ +#ifdef _POSIX2_RE_DUP_MAX +#define DUPMAX _POSIX2_RE_DUP_MAX +#else +#define DUPMAX 255 +#endif +#define INFINITY (DUPMAX + 1) +#define NC (CHAR_MAX - CHAR_MIN + 1) +typedef unsigned char uch; + +/* switch off assertions (if not already off) if no REDEBUG */ +#ifndef REDEBUG +#ifndef NDEBUG +#define NDEBUG /* no assertions please */ +#endif +#endif +#include + +/* for old systems with bcopy() but no memmove() */ +#ifdef USEBCOPY +#define memmove(d, s, c) bcopy(s, d, c) +#endif diff --git a/src/unix/net.cpp b/src/unix/net.cpp deleted file mode 100644 index 8fff0bfdd1..0000000000 --- a/src/unix/net.cpp +++ /dev/null @@ -1,422 +0,0 @@ -// -*- c++ -*- /////////////////////////////////////////////////////////////// -// Name: unix/net.cpp -// Purpose: Network related wxWindows classes and functions -// Author: Karsten Ballüder -// Modified by: -// Created: 03.10.99 -// RCS-ID: $Id$ -// Copyright: (c) Karsten Ballüder -// Licence: wxWindows licence -///////////////////////////////////////////////////////////////////////////// - -#include "wx/setup.h" - -#if wxUSE_DIALUP_MANAGER - -#ifndef WX_PRECOMP -# include "wx/defs.h" -#endif // !PCH - -#include "wx/string.h" -#include "wx/event.h" -#include "wx/net.h" -#include "wx/timer.h" -#include "wx/filefn.h" -#include "wx/utils.h" -#include "wx/log.h" -#include "wx/file.h" - -#include -#include -#include -#include -#define __STRICT_ANSI__ -#include -#include -#include -#include -#include -#include - -// ---------------------------------------------------------------------------- -// A class which groups functions dealing with connecting to the network from a -// workstation using dial-up access to the net. There is at most one instance -// of this class in the program accessed via GetDialUpManager(). -// ---------------------------------------------------------------------------- - -/* TODO - * - * 1. more configurability for Unix: i.e. how to initiate the connection, how - * to check for online status, &c. - * 2. add a "long Dial(long connectionId = -1)" function which asks the user - * about which connection to dial (this may be done using native dialogs - * under NT, need generic dialogs for all others) and returns the identifier - * of the selected connection (it's opaque to the application) - it may be - * reused later to dial the same connection later (or use strings instead of - * longs may be?) - * 3. add an async version of dialing functions which notify the caller about - * the progress (or may be even start another thread to monitor it) - * 4. the static creation/accessor functions are not MT-safe - but is this - * really crucial? I think we may suppose they're always called from the - * main thread? - */ - -class WXDLLEXPORT wxDialUpManagerImpl : public wxDialUpManager -{ -public: - wxDialUpManagerImpl() - { - m_IsOnline = -1; // unknown - m_timer = NULL; - m_CanUseIfconfig = -1; // unknown - m_BeaconHost = WXDIALUP_MANAGER_DEFAULT_BEACONHOST; - m_BeaconPort = 80; - } - - /** Could the dialup manager be initialized correctly? If this function - returns FALSE, no other functions will work neither, so it's a good idea - to call this function and check its result before calling any other - wxDialUpManager methods. - */ - virtual bool IsOk() const - { return TRUE; } - - /** The simplest way to initiate a dial up: this function dials the given - ISP (exact meaning of the parameter depends on the platform), returns - TRUE on success or FALSE on failure and logs the appropriate error - message in the latter case. - @param nameOfISP optional paramater for dial program - @param username unused - @param password unused - */ - virtual bool Dial(const wxString& nameOfISP, - const wxString& WXUNUSED(username), - const wxString& WXUNUSED(password)); - - /// Hang up the currently active dial up connection. - virtual bool HangUp(); - - // returns TRUE if the computer is connected to the network: under Windows, - // this just means that a RAS connection exists, under Unix we check that - // the "well-known host" (as specified by SetWellKnownHost) is reachable - virtual bool IsOnline() const - { - if( (! m_timer) // we are not polling, so test now: - || m_IsOnline == -1 - ) - CheckStatus(); - return m_IsOnline != 0; - } - - // sometimes the built-in logic for determining the online status may fail, - // so, in general, the user should be allowed to override it. This function - // allows to forcefully set the online status - whatever our internal - // algorithm may think about it. - virtual void SetOnlineStatus(bool isOnline = TRUE) - { m_IsOnline = isOnline; } - - // set misc wxDialUpManager options - // -------------------------------- - - // enable automatical checks for the connection status and sending of - // wxEVT_DIALUP_CONNECTED/wxEVT_DIALUP_DISCONNECTED events. The interval - // parameter is only for Unix where we do the check manually: under - // Windows, the notification about the change of connection status is - // instantenous. - // - // Returns FALSE if couldn't set up automatic check for online status. - virtual bool EnableAutoCheckOnlineStatus(size_t nSeconds); - - // disable automatic check for connection status change - notice that the - // wxEVT_DIALUP_XXX events won't be sent any more neither. - virtual void DisableAutoCheckOnlineStatus(); - - // under Unix, the value of well-known host is used to check whether we're - // connected to the internet. It's unused under Windows, but this function - // is always safe to call. The default value is www.yahoo.com. - virtual void SetWellKnownHost(const wxString& hostname, - int portno = 80); - /** Sets the commands to start up the network and to hang up - again. Used by the Unix implementations only. - */ - virtual void SetConnectCommand(const wxString &command, const wxString &hupcmd) - { m_ConnectCommand = command; m_HangUpCommand = hupcmd; } - -private: - /// -1: don´t know, 0 = no, 1 = yes - int m_IsOnline; - - /// Can we use ifconfig to list active devices? - int m_CanUseIfconfig; - /// The path to ifconfig - wxString m_IfconfigPath; - - /// beacon host: - wxString m_BeaconHost; - /// beacon host portnumber for connect: - int m_BeaconPort; - - /// command to connect to network - wxString m_ConnectCommand; - /// command to hang up - wxString m_HangUpCommand; - /// name of ISP - wxString m_ISPname; - /// a timer for regular testing - class AutoCheckTimer *m_timer; - - friend class AutoCheckTimer; - /// determine status - void CheckStatus(void) const; - - /// real status check - void CheckStatusInternal(void); -}; - - -class AutoCheckTimer : public wxTimer -{ -public: - AutoCheckTimer(wxDialUpManagerImpl *dupman) - { - m_dupman = dupman; - m_started = FALSE; - } - - virtual bool Start( int millisecs = -1 ) - { m_started = TRUE; return wxTimer::Start(millisecs, FALSE); } - - virtual void Notify() - { wxLogTrace("Checking dial up network status."); m_dupman->CheckStatus(); } - - virtual void Stop() - { if ( m_started ) wxTimer::Stop(); } -public: - bool m_started; - wxDialUpManagerImpl *m_dupman; -}; - -bool -wxDialUpManagerImpl::Dial(const wxString &isp, - const wxString & WXUNUSED(username), - const wxString & WXUNUSED(password)) -{ - if(m_IsOnline == 1) - return FALSE; - m_IsOnline = -1; - m_ISPname = isp; - wxString cmd; - if(m_ConnectCommand.Find("%s")) - cmd.Printf(m_ConnectCommand,m_ISPname.c_str()); - else - cmd = m_ConnectCommand; - return wxExecute(cmd, /* sync */ TRUE) == 0; -} - -bool -wxDialUpManagerImpl::HangUp(void) -{ - if(m_IsOnline == 0) - return FALSE; - m_IsOnline = -1; - wxString cmd; - if(m_HangUpCommand.Find("%s")) - cmd.Printf(m_HangUpCommand,m_ISPname.c_str()); - else - cmd = m_HangUpCommand; - return wxExecute(cmd, /* sync */ TRUE) == 0; -} - - -bool -wxDialUpManagerImpl::EnableAutoCheckOnlineStatus(size_t nSeconds) -{ - wxASSERT(m_timer == NULL); - m_timer = new AutoCheckTimer(this); - bool rc = m_timer->Start(nSeconds*1000); - if(! rc) - { - delete m_timer; - m_timer = NULL; - } - return rc; -} - -void -wxDialUpManagerImpl::DisableAutoCheckOnlineStatus() -{ - wxASSERT(m_timer != NULL); - m_timer->Stop(); - delete m_timer; - m_timer = NULL; -} - - -void -wxDialUpManagerImpl::SetWellKnownHost(const wxString& hostname, int portno) -{ - /// does hostname contain a port number? - wxString port = hostname.After(':'); - if(port.Length()) - { - m_BeaconHost = hostname.Before(':'); - m_BeaconPort = atoi(port); - } - else - { - m_BeaconHost = hostname; - m_BeaconPort = portno; - } -} - - -void -wxDialUpManagerImpl::CheckStatus(void) const -{ - // This function calls the CheckStatusInternal() helper function - // which is OS - specific and then sends the events. - - int oldIsOnline = m_IsOnline; - ( /* non-const */ (wxDialUpManagerImpl *)this)->CheckStatusInternal(); - - // now send the events as appropriate: - if(m_IsOnline != oldIsOnline) - { - if(m_IsOnline) - ; // send ev - else - ; // send ev - } -} - -/* - We have three methods that we can use: - - 1. test via /sbin/ifconfig and grep for "sl", "ppp", "pl" - --> should be fast enough for regular polling - 2. test if we can reach the well known beacon host - --> too slow for polling - 3. check /proc/net/dev on linux?? - This method should be preferred, if possible. Need to do more - testing. - -*/ - -void -wxDialUpManagerImpl::CheckStatusInternal(void) -{ - m_IsOnline = -1; - - // First time check for ifconfig location. We only use the variant - // which does not take arguments, a la GNU. - if(m_CanUseIfconfig == -1) // unknown - { - if(wxFileExists("/sbin/ifconfig")) - m_IfconfigPath = "/sbin/ifconfig"; - else if(wxFileExists("/usr/sbin/ifconfig")) - m_IfconfigPath = "/usr/sbin/ifconfig"; - } - - wxLogNull ln; // suppress all error messages - // Let´s try the ifconfig method first, should be fastest: - if(m_CanUseIfconfig != 0) // unknown or yes - { - wxASSERT(m_IfconfigPath.length()); - - wxString tmpfile = wxGetTempFileName("_wxdialuptest"); - wxString cmd = "/bin/sh -c \'"; - cmd << m_IfconfigPath << " >" << tmpfile << '\''; - /* I tried to add an option to wxExecute() to not close stdout, - so we could let ifconfig write directly to the tmpfile, but - this does not work. That should be faster, as it doesn´t call - the shell first. I have no idea why. :-( (KB) */ -#if 0 - // temporarily redirect stdout/stderr: - int - new_stdout = dup(STDOUT_FILENO), - new_stderr = dup(STDERR_FILENO); - close(STDOUT_FILENO); - close(STDERR_FILENO); - - int - // new stdout: - output_fd = open(tmpfile, O_CREAT|O_TRUNC, S_IRUSR|S_IWUSR), - // new stderr: - null_fd = open("/dev/null", O_CREAT, S_IRUSR|S_IWUSR); - // verify well behaved unix behaviour: - wxASSERT(output_fd == STDOUT_FILENO); - wxASSERT(null_fd == STDERR_FILENO); - int rc = wxExecute(m_IfconfigPath,TRUE /* sync */,NULL ,wxEXECUTE_DONT_CLOSE_FDS); - close(null_fd); close(output_fd); - // restore old stdout, stderr: - int test; - test = dup(new_stdout); close(new_stdout); wxASSERT(test == STDOUT_FILENO); - test = dup(new_stderr); close(new_stderr); wxASSERT(test == STDERR_FILENO); - if(rc == 0) -#endif - if(wxExecute(cmd,TRUE /* sync */) == 0) - { - m_CanUseIfconfig = 1; - wxFile file; - if( file.Open(tmpfile) ) - { - char *output = new char [file.Length()+1]; - output[file.Length()] = '\0'; - if(file.Read(output,file.Length()) == file.Length()) - { - if(strstr(output,"ppp") // ppp - || strstr(output,"sl") // slip - || strstr(output,"pl") // plip - ) - m_IsOnline = 1; - else - m_IsOnline = 0; - } - file.Close(); - delete [] output; - } - // else m_IsOnline remains -1 as we don't know for sure - } - else // could not run ifconfig correctly - m_CanUseIfconfig = 0; // don´t try again - (void) wxRemoveFile(tmpfile); - if(m_IsOnline != -1) // we are done - return; - } - - // second method: try to connect to well known host: - // This can be used under Win 9x, too! - struct hostent *hp; - struct sockaddr_in serv_addr; - int sockfd; - - m_IsOnline = 0; // assume false - if((hp = gethostbyname(m_BeaconHost)) == NULL) - return; // no DNS no net - - serv_addr.sin_family = hp->h_addrtype; - memcpy(&serv_addr.sin_addr,hp->h_addr, hp->h_length); - serv_addr.sin_port = htons(m_BeaconPort); - if( ( sockfd = socket(hp->h_addrtype, SOCK_STREAM, 0)) < 0) - { - // sys_error("cannot create socket for gw"); - return; - } - if( connect(sockfd, (struct sockaddr *) &serv_addr, sizeof(serv_addr)) < 0) - { - //sys_error("cannot connect to server"); - return; - } - //connected! - close(sockfd); -} - - -/* static */ -wxDialUpManager * -wxDialUpManager::wxDialUpManager::Create(void) -{ - return new wxDialUpManagerImpl; -} - -#endif // wxUSE_DIALUP_MANAGER diff --git a/utils/wxprop/src/prop.cpp b/utils/wxprop/src/prop.cpp deleted file mode 100644 index 59802962de..0000000000 --- a/utils/wxprop/src/prop.cpp +++ /dev/null @@ -1,1119 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Name: prop.cpp -// Purpose: Propert sheet classes implementation -// Author: Julian Smart -// Modified by: -// Created: 04/01/98 -// RCS-ID: $Id$ -// Copyright: (c) Julian Smart -// Licence: wxWindows license -///////////////////////////////////////////////////////////////////////////// - -#ifdef __GNUG__ -#pragma implementation "prop.h" -#endif - -// For compilers that support precompilation, includes "wx/wx.h". -#include "wx/wxprec.h" - -#ifdef __BORLANDC__ -#pragma hdrstop -#endif - -#ifndef WX_PRECOMP -#include "wx/wx.h" -#endif - -#include -#include -#include -#include - -#if wxUSE_IOSTREAMH -#if defined(__WXMSW__) && !defined(__GNUWIN32__) -#include -#else -#include -#endif -#else -#include -#endif - -#include "wx/window.h" -#include "wx/utils.h" -#include "wx/list.h" -#include "prop.h" - -IMPLEMENT_DYNAMIC_CLASS(wxPropertyValue, wxObject) - -wxPropertyValue::wxPropertyValue(void) -{ - m_type = wxPropertyValueNull; - m_next = NULL; - m_last = NULL; - m_value.first = NULL; - m_clientData = NULL; - m_modifiedFlag = FALSE; -} - -wxPropertyValue::wxPropertyValue(const wxPropertyValue& copyFrom) -{ - m_modifiedFlag = FALSE; - Copy((wxPropertyValue& )copyFrom); -} - -wxPropertyValue::wxPropertyValue(const char *val) -{ - m_modifiedFlag = FALSE; - m_type = wxPropertyValueString; - - m_value.string = copystring(val); - m_clientData = NULL; - m_next = NULL; - m_last = NULL; -} - -wxPropertyValue::wxPropertyValue(const wxString& val) -{ - m_modifiedFlag = FALSE; - m_type = wxPropertyValueString; - - m_value.string = copystring((const char *)val); - m_clientData = NULL; - m_next = NULL; - m_last = NULL; -} - -wxPropertyValue::wxPropertyValue(long the_integer) -{ - m_modifiedFlag = FALSE; - m_type = wxPropertyValueInteger; - m_value.integer = the_integer; - m_clientData = NULL; - m_next = NULL; -} - -wxPropertyValue::wxPropertyValue(bool val) -{ - m_modifiedFlag = FALSE; - m_type = wxPropertyValuebool; - m_value.integer = val; - m_clientData = NULL; - m_next = NULL; -} - -wxPropertyValue::wxPropertyValue(float the_real) -{ - m_modifiedFlag = FALSE; - m_type = wxPropertyValueReal; - m_value.real = the_real; - m_clientData = NULL; - m_next = NULL; -} - -wxPropertyValue::wxPropertyValue(double the_real) -{ - m_modifiedFlag = FALSE; - m_type = wxPropertyValueReal; - m_value.real = (float)the_real; - m_clientData = NULL; - m_next = NULL; -} - -// Pointer versions: we have a pointer to the real C++ value. -wxPropertyValue::wxPropertyValue(char **val) -{ - m_modifiedFlag = FALSE; - m_type = wxPropertyValueStringPtr; - - m_value.stringPtr = val; - m_clientData = NULL; - m_next = NULL; - m_last = NULL; -} - -wxPropertyValue::wxPropertyValue(long *val) -{ - m_modifiedFlag = FALSE; - m_type = wxPropertyValueIntegerPtr; - m_value.integerPtr = val; - m_clientData = NULL; - m_next = NULL; -} - -wxPropertyValue::wxPropertyValue(bool *val) -{ - m_modifiedFlag = FALSE; - m_type = wxPropertyValueboolPtr; - m_value.boolPtr = val; - m_clientData = NULL; - m_next = NULL; -} - -wxPropertyValue::wxPropertyValue(float *val) -{ - m_modifiedFlag = FALSE; - m_type = wxPropertyValueRealPtr; - m_value.realPtr = val; - m_clientData = NULL; - m_next = NULL; -} - -wxPropertyValue::wxPropertyValue(wxList *the_list) -{ - m_modifiedFlag = FALSE; - m_type = wxPropertyValueList; - m_clientData = NULL; - m_last = NULL; - m_value.first = NULL; - - wxNode *node = the_list->First(); - while (node) - { - wxPropertyValue *expr = (wxPropertyValue *)node->Data(); - Append(expr); - node = node->Next(); - } - - delete the_list; -} - -wxPropertyValue::wxPropertyValue(wxStringList *the_list) -{ - m_modifiedFlag = FALSE; - m_type = wxPropertyValueList; - m_clientData = NULL; - m_last = NULL; - m_value.first = NULL; - - wxNode *node = the_list->First(); - while (node) - { - char *s = (char *)node->Data(); - Append(new wxPropertyValue(s)); - node = node->Next(); - } - delete the_list; -} - -wxPropertyValue::~wxPropertyValue(void) -{ - switch (m_type) - { - case wxPropertyValueInteger: - case wxPropertyValuebool: - case wxPropertyValueReal: - { - break; - } - case wxPropertyValueString: - { - delete[] m_value.string; - break; - } - case wxPropertyValueList: - { - wxPropertyValue *expr = m_value.first; - while (expr) - { - wxPropertyValue *expr1 = expr->m_next; - - delete expr; - expr = expr1; - } - break; - } - default: - case wxPropertyValueNull: break; - } -} - -void wxPropertyValue::Append(wxPropertyValue *expr) -{ - m_modifiedFlag = TRUE; - if (!m_value.first) - m_value.first = expr; - - if (m_last) - m_last->m_next = expr; - m_last = expr; -} - -void wxPropertyValue::Insert(wxPropertyValue *expr) -{ - m_modifiedFlag = TRUE; - expr->m_next = m_value.first; - m_value.first = expr; - - if (!m_last) - m_last = expr; -} - -// Delete from list -void wxPropertyValue::Delete(wxPropertyValue *node) -{ - wxPropertyValue *expr = GetFirst(); - - wxPropertyValue *previous = NULL; - while (expr && (expr != node)) - { - previous = expr; - expr = expr->GetNext(); - } - - if (expr) - { - if (previous) - previous->m_next = expr->m_next; - - // If node was the first in the list, - // make the list point to the NEXT one. - if (GetFirst() == expr) - { - m_value.first = expr->m_next; - } - - // If node was the last in the list, - // make the list 'last' pointer point to the PREVIOUS one. - if (GetLast() == expr) - { - if (previous) - m_last = previous; - else - m_last = NULL; - } - m_modifiedFlag = TRUE; - delete expr; - } - -} - -void wxPropertyValue::ClearList(void) -{ - wxPropertyValue *val = GetFirst(); - if (val) - m_modifiedFlag = TRUE; - - while (val) - { - wxPropertyValue *next = val->GetNext(); - delete val; - val = next; - } - m_value.first = NULL; - m_last = NULL; -} - -wxPropertyValue *wxPropertyValue::NewCopy(void) const -{ - switch (m_type) - { - case wxPropertyValueInteger: - return new wxPropertyValue(m_value.integer); - case wxPropertyValuebool: - return new wxPropertyValue((bool) (m_value.integer != 0)); - case wxPropertyValueReal: - return new wxPropertyValue(m_value.real); - case wxPropertyValueString: - return new wxPropertyValue(m_value.string); - case wxPropertyValueList: - { - wxPropertyValue *expr = m_value.first; - wxPropertyValue *new_list = new wxPropertyValue; - new_list->SetType(wxPropertyValueList); - while (expr) - { - wxPropertyValue *expr2 = expr->NewCopy(); - new_list->Append(expr2); - expr = expr->m_next; - } - return new_list; - } - case wxPropertyValueIntegerPtr: - return new wxPropertyValue(m_value.integerPtr); - case wxPropertyValueRealPtr: - return new wxPropertyValue(m_value.realPtr); - case wxPropertyValueboolPtr: - return new wxPropertyValue(m_value.boolPtr); - case wxPropertyValueStringPtr: - return new wxPropertyValue(m_value.stringPtr); - - case wxPropertyValueNull: -#ifdef __X__ - cerr << "Should never get here!\n"; -#endif - break; - } - return NULL; -} - -void wxPropertyValue::Copy(wxPropertyValue& copyFrom) -{ - m_type = copyFrom.Type(); - - switch (m_type) - { - case wxPropertyValueInteger: - (*this) = copyFrom.IntegerValue(); - return ; - - case wxPropertyValueReal: - (*this) = copyFrom.RealValue(); - return ; - - case wxPropertyValueString: - (*this) = wxString(copyFrom.StringValue()); - return ; - - case wxPropertyValuebool: - (*this) = copyFrom.BoolValue(); - return ; - - // Pointers - case wxPropertyValueboolPtr: - (*this) = copyFrom.BoolValuePtr(); - return ; - case wxPropertyValueRealPtr: - (*this) = copyFrom.RealValuePtr(); - return ; - case wxPropertyValueIntegerPtr: - (*this) = copyFrom.IntegerValuePtr(); - return ; - case wxPropertyValueStringPtr: - { - char** s = copyFrom.StringValuePtr(); - (*this) = s != 0; - return ; - } - - case wxPropertyValueList: - { - m_value.first = NULL; - m_next = NULL; - m_last = NULL; - wxPropertyValue *expr = copyFrom.m_value.first; - while (expr) - { - wxPropertyValue *expr2 = expr->NewCopy(); - Append(expr2); - expr = expr->m_next; - } - return; - } - case wxPropertyValueNull: -#ifdef __X__ - cerr << "Should never get here!\n"; -#endif - break; - } -} - -// Return nth argument of a clause (starting from 1) -wxPropertyValue *wxPropertyValue::Arg(wxPropertyValueType type, int arg) const -{ - wxPropertyValue *expr = m_value.first; - for (int i = 1; i < arg; i++) - if (expr) - expr = expr->m_next; - - if (expr && (expr->m_type == type)) - return expr; - else - return NULL; -} - -// Return nth argument of a list expression (starting from zero) -wxPropertyValue *wxPropertyValue::Nth(int arg) const -{ - if (m_type != wxPropertyValueList) - return NULL; - - wxPropertyValue *expr = m_value.first; - for (int i = 0; i < arg; i++) - if (expr) - expr = expr->m_next; - else return NULL; - - if (expr) - return expr; - else - return NULL; -} - - // Returns the number of elements in a list expression -int wxPropertyValue::Number(void) const -{ - if (m_type != wxPropertyValueList) - return 0; - - int i = 0; - wxPropertyValue *expr = m_value.first; - while (expr) - { - expr = expr->m_next; - i ++; - } - return i; -} - -void wxPropertyValue::WritePropertyClause(ostream& stream) // Write this expression as a top-level clause -{ - if (m_type != wxPropertyValueList) - return; - - wxPropertyValue *node = m_value.first; - if (node) - { - node->WritePropertyType(stream); - stream << "("; - node = node->m_next; - bool first = TRUE; - while (node) - { - if (!first) - stream << " "; - node->WritePropertyType(stream); - node = node->m_next; - if (node) stream << ",\n"; - first = FALSE; - } - stream << ").\n\n"; - } -} - -void wxPropertyValue::WritePropertyType(ostream& stream) // Write as any other subexpression -{ - switch (m_type) - { - case wxPropertyValueInteger: - { - stream << m_value.integer; - break; - } - case wxPropertyValueIntegerPtr: - { - stream << *m_value.integerPtr; - break; - } - case wxPropertyValuebool: - { - if (m_value.integer) - stream << "True"; - else - stream << "False"; - break; - } - case wxPropertyValueboolPtr: - { - if (*m_value.integerPtr) - stream << "True"; - else - stream << "False"; - break; - } - case wxPropertyValueReal: - { - float f = m_value.real; - sprintf(wxBuffer, "%.6g", (double)f); - stream << wxBuffer; - break; - } - case wxPropertyValueRealPtr: - { - float f = *m_value.realPtr; -/* Now the parser can cope with this. - // Prevent printing in 'e' notation. Any better way? - if (fabs(f) < 0.00001) - f = 0.0; -*/ - sprintf(wxBuffer, "%.6g", f); - stream << wxBuffer; - break; - } - case wxPropertyValueString: - { -// stream << "\""; - int i; - int len = strlen(m_value.string); - for (i = 0; i < len; i++) - { - char ch = m_value.string[i]; -// if (ch == '"' || ch == '\\') -// stream << "\\"; - stream << ch; - } - -// stream << "\""; - break; - } - case wxPropertyValueStringPtr: - { - int i; - int len = strlen(*(m_value.stringPtr)); - for (i = 0; i < len; i++) - { - char ch = *(m_value.stringPtr)[i]; - - } - break; - } - case wxPropertyValueList: - { - if (!m_value.first) - stream << "[]"; - else - { - wxPropertyValue *expr = m_value.first; - - stream << "["; - while (expr) - { - expr->WritePropertyType(stream); - expr = expr->m_next; - if (expr) stream << ", "; - } - stream << "]"; - } - break; - } - case wxPropertyValueNull: break; - } -} - -wxString wxPropertyValue::GetStringRepresentation(void) -{ - char buf[500]; - buf[0] = 0; - - ostrstream str((char *)buf, (int)500, ios::out); - WritePropertyType(str); - str << '\0'; - str.flush(); - - wxString theString(buf); - return theString; -} - -void wxPropertyValue::operator=(const wxPropertyValue& val) -{ - m_modifiedFlag = TRUE; - Copy((wxPropertyValue&)val); -} - -// void wxPropertyValue::operator=(const char *val) -void wxPropertyValue::operator=(const wxString& val1) -{ - const char *val = (const char *)val1; - - m_modifiedFlag = TRUE; - if (m_type == wxPropertyValueNull) - m_type = wxPropertyValueString; - - if (m_type == wxPropertyValueString) - { - if (val) - m_value.string = copystring(val); - else - m_value.string = NULL; - } - else if (m_type == wxPropertyValueStringPtr) - { - if (*m_value.stringPtr) - delete[] *m_value.stringPtr; - if (val) - *m_value.stringPtr = copystring(val); - else - *m_value.stringPtr = NULL; - } - - m_clientData = NULL; - m_next = NULL; - m_last = NULL; - -} - -void wxPropertyValue::operator=(const long val) -{ - m_modifiedFlag = TRUE; - if (m_type == wxPropertyValueNull) - m_type = wxPropertyValueInteger; - - if (m_type == wxPropertyValueInteger) - m_value.integer = val; - else if (m_type == wxPropertyValueIntegerPtr) - *m_value.integerPtr = val; - else if (m_type == wxPropertyValueReal) - m_value.real = (float)val; - else if (m_type == wxPropertyValueRealPtr) - *m_value.realPtr = (float)val; - - m_clientData = NULL; - m_next = NULL; -} - -void wxPropertyValue::operator=(const bool val) -{ - m_modifiedFlag = TRUE; - if (m_type == wxPropertyValueNull) - m_type = wxPropertyValuebool; - - if (m_type == wxPropertyValuebool) - m_value.integer = (long)val; - else if (m_type == wxPropertyValueboolPtr) - *m_value.boolPtr = val; - - m_clientData = NULL; - m_next = NULL; -} - -void wxPropertyValue::operator=(const float val) -{ - m_modifiedFlag = TRUE; - if (m_type == wxPropertyValueNull) - m_type = wxPropertyValueReal; - - if (m_type == wxPropertyValueInteger) - m_value.integer = (long)val; - else if (m_type == wxPropertyValueIntegerPtr) - *m_value.integerPtr = (long)val; - else if (m_type == wxPropertyValueReal) - m_value.real = val; - else if (m_type == wxPropertyValueRealPtr) - *m_value.realPtr = val; - - m_clientData = NULL; - m_next = NULL; -} - -void wxPropertyValue::operator=(const char **val) -{ - m_modifiedFlag = TRUE; - m_type = wxPropertyValueStringPtr; - - if (val) - m_value.stringPtr = (char **)val; - else - m_value.stringPtr = NULL; - m_clientData = NULL; - m_next = NULL; - m_last = NULL; - -} - -void wxPropertyValue::operator=(const long *val) -{ - m_modifiedFlag = TRUE; - m_type = wxPropertyValueIntegerPtr; - m_value.integerPtr = (long *)val; - m_clientData = NULL; - m_next = NULL; -} - -void wxPropertyValue::operator=(const bool *val) -{ - m_modifiedFlag = TRUE; - m_type = wxPropertyValueboolPtr; - m_value.boolPtr = (bool *)val; - m_clientData = NULL; - m_next = NULL; -} - -void wxPropertyValue::operator=(const float *val) -{ - m_modifiedFlag = TRUE; - m_type = wxPropertyValueRealPtr; - m_value.realPtr = (float *)val; - m_clientData = NULL; - m_next = NULL; -} - -long wxPropertyValue::IntegerValue(void) const - { - if (m_type == wxPropertyValueInteger) - return m_value.integer; - else if (m_type == wxPropertyValueReal) - return (long)m_value.real; - else if (m_type == wxPropertyValueIntegerPtr) - return *m_value.integerPtr; - else if (m_type == wxPropertyValueRealPtr) - return (long)(*m_value.realPtr); - else return 0; - } - -long *wxPropertyValue::IntegerValuePtr(void) const -{ - return m_value.integerPtr; -} - -float wxPropertyValue::RealValue(void) const { - if (m_type == wxPropertyValueReal) - return m_value.real; - else if (m_type == wxPropertyValueRealPtr) - return *m_value.realPtr; - else if (m_type == wxPropertyValueInteger) - return (float)m_value.integer; - else if (m_type == wxPropertyValueIntegerPtr) - return (float)*(m_value.integerPtr); - else return 0.0; - } - -float *wxPropertyValue::RealValuePtr(void) const -{ - return m_value.realPtr; -} - -bool wxPropertyValue::BoolValue(void) const { - if (m_type == wxPropertyValueReal) - return (m_value.real != 0.0); - if (m_type == wxPropertyValueRealPtr) - return (*(m_value.realPtr) != 0.0); - else if (m_type == wxPropertyValueInteger) - return (m_value.integer != 0); - else if (m_type == wxPropertyValueIntegerPtr) - return (*(m_value.integerPtr) != 0); - else if (m_type == wxPropertyValuebool) - return (m_value.integer != 0); - else if (m_type == wxPropertyValueboolPtr) - return (*(m_value.boolPtr) != 0); - else return FALSE; - } - -bool *wxPropertyValue::BoolValuePtr(void) const -{ - return m_value.boolPtr; -} - -char *wxPropertyValue::StringValue(void) const { - if (m_type == wxPropertyValueString) - return m_value.string; - else if (m_type == wxPropertyValueStringPtr) - return *(m_value.stringPtr); - else return NULL; - } - -char **wxPropertyValue::StringValuePtr(void) const -{ - return m_value.stringPtr; -} - -/* - * A property (name plus value) - */ - -IMPLEMENT_DYNAMIC_CLASS(wxProperty, wxObject) - -wxProperty::wxProperty(void) -{ - m_propertyRole = (char *)NULL; - m_propertyValidator = NULL; - m_propertyWindow = NULL; - m_enabled = TRUE; -} - -wxProperty::wxProperty(wxProperty& copyFrom) -{ - m_value = copyFrom.GetValue(); - m_name = copyFrom.GetName(); - m_propertyRole = copyFrom.GetRole(); - m_propertyValidator = copyFrom.GetValidator(); - m_enabled = copyFrom.IsEnabled(); - m_propertyWindow = NULL; -} - -wxProperty::wxProperty(wxString nm, wxString role, wxPropertyValidator *ed):m_name(nm), m_propertyRole(role) -{ - m_propertyValidator = ed; - m_propertyWindow = NULL; - m_enabled = TRUE; -} - -wxProperty::wxProperty(wxString nm, const wxPropertyValue& val, wxString role, wxPropertyValidator *ed): - m_name(nm), m_value(val), m_propertyRole(role) -{ - m_propertyValidator = ed; - m_propertyWindow = NULL; - m_enabled = TRUE; -} - -wxProperty::~wxProperty(void) -{ - if (m_propertyValidator) - delete m_propertyValidator; -} - -wxPropertyValue& wxProperty::GetValue(void) const -{ - return (wxPropertyValue&) m_value; -} - -wxPropertyValidator *wxProperty::GetValidator(void) const -{ - return m_propertyValidator; -} - -wxString& wxProperty::GetName(void) const -{ - return (wxString&) m_name; -} - -wxString& wxProperty::GetRole(void) const -{ - return (wxString&) m_propertyRole; -} - -void wxProperty::SetValue(const wxPropertyValue& val) -{ - m_value = val; -} - -void wxProperty::SetValidator(wxPropertyValidator *ed) -{ - m_propertyValidator = ed; -} - -void wxProperty::SetRole(wxString& role) -{ - m_propertyRole = role; -} - -void wxProperty::SetName(wxString& nm) -{ - m_name = nm; -} - -void wxProperty::operator=(const wxPropertyValue& val) -{ - m_value = val; -} - -/* - * Base property view class - */ - -IMPLEMENT_DYNAMIC_CLASS(wxPropertyView, wxEvtHandler) - -wxPropertyView::wxPropertyView(long flags) -{ - m_buttonFlags = flags; - m_propertySheet = NULL; - m_currentValidator = NULL; - m_currentProperty = NULL; -} - -wxPropertyView::~wxPropertyView(void) -{ -} - -void wxPropertyView::AddRegistry(wxPropertyValidatorRegistry *registry) -{ - m_validatorRegistryList.Append(registry); -} - -wxPropertyValidator *wxPropertyView::FindPropertyValidator(wxProperty *property) -{ - if (property->GetValidator()) - return property->GetValidator(); - - wxNode *node = m_validatorRegistryList.First(); - while (node) - { - wxPropertyValidatorRegistry *registry = (wxPropertyValidatorRegistry *)node->Data(); - wxPropertyValidator *validator = registry->GetValidator(property->GetRole()); - if (validator) - return validator; - node = node->Next(); - } - return NULL; -/* - if (!wxDefaultPropertyValidator) - wxDefaultPropertyValidator = new wxPropertyListValidator; - return wxDefaultPropertyValidator; -*/ -} - -/* - * Property sheet - */ - -IMPLEMENT_DYNAMIC_CLASS(wxPropertySheet, wxObject) - -wxPropertySheet::wxPropertySheet(void):m_properties(wxKEY_STRING) -{ -} - -wxPropertySheet::~wxPropertySheet(void) -{ - Clear(); -} - -bool wxPropertySheet::Save( ostream& WXUNUSED(str) ) -{ - return FALSE; -} - -bool wxPropertySheet::Load( ostream& WXUNUSED(str) ) -{ - return FALSE; -} - -void wxPropertySheet::UpdateAllViews( wxPropertyView *WXUNUSED(thisView) ) -{ -} - -// Add a property -void wxPropertySheet::AddProperty(wxProperty *property) -{ - m_properties.Append((const char*) property->GetName(), property); -} - -// Get property by name -wxProperty *wxPropertySheet::GetProperty(wxString name) -{ - wxNode *node = m_properties.Find((const char*) name); - if (!node) - return NULL; - else - return (wxProperty *)node->Data(); -} - -// Clear all properties -void wxPropertySheet::Clear(void) -{ - wxNode *node = m_properties.First(); - while (node) - { - wxProperty *prop = (wxProperty *)node->Data(); - wxNode *next = node->Next(); - delete prop; - delete node; - node = next; - } -} - -// Sets/clears the modified flag for each property value -void wxPropertySheet::SetAllModified(bool flag) -{ - wxNode *node = m_properties.First(); - while (node) - { - wxProperty *prop = (wxProperty *)node->Data(); - prop->GetValue().SetModified(flag); - node = node->Next(); - } -} - -/* - * Property validator registry - * - */ - -IMPLEMENT_DYNAMIC_CLASS(wxPropertyValidatorRegistry, wxHashTable) - -wxPropertyValidatorRegistry::wxPropertyValidatorRegistry(void):wxHashTable(wxKEY_STRING) -{ -} - -wxPropertyValidatorRegistry::~wxPropertyValidatorRegistry(void) -{ - ClearRegistry(); -} - -void wxPropertyValidatorRegistry::RegisterValidator(const wxString& typeName, wxPropertyValidator *validator) -{ - Put((const char*) typeName, validator); -} - -wxPropertyValidator *wxPropertyValidatorRegistry::GetValidator(const wxString& typeName) -{ - return (wxPropertyValidator *)Get((const char*) typeName); -} - -void wxPropertyValidatorRegistry::ClearRegistry(void) -{ - BeginFind(); - wxNode *node; - while (node = Next()) - { - delete (wxPropertyValidator *)node->Data(); - } -} - - /* - * Property validator - */ - - -IMPLEMENT_ABSTRACT_CLASS(wxPropertyValidator, wxEvtHandler) - -wxPropertyValidator::wxPropertyValidator(long flags) -{ - m_validatorFlags = flags; - m_validatorProperty = NULL; -} - -wxPropertyValidator::~wxPropertyValidator(void) -{} - -bool wxPropertyValidator::StringToFloat (char *s, float *number) { - double num; - bool ok = StringToDouble (s, &num); - *number = (float) num; - return ok; -} - -bool wxPropertyValidator::StringToDouble (char *s, double *number) { - bool ok = TRUE; - char *value_ptr; - *number = strtod (s, &value_ptr); - if (value_ptr) { - int len = strlen (value_ptr); - for (int i = 0; i < len; i++) { - ok = (isspace (value_ptr[i]) != 0); - if (!ok) return FALSE; - } - } - return ok; -} - -bool wxPropertyValidator::StringToInt (char *s, int *number) { - long num; - bool ok = StringToLong (s, &num); - *number = (int) num; - return ok; -} - -bool wxPropertyValidator::StringToLong (char *s, long *number) { - bool ok = TRUE; - char *value_ptr; - *number = strtol (s, &value_ptr, 10); - if (value_ptr) { - int len = strlen (value_ptr); - for (int i = 0; i < len; i++) { - ok = (isspace (value_ptr[i]) != 0); - if (!ok) return FALSE; - } - } - return ok; -} - -char *wxPropertyValidator::FloatToString (float number) { - static char buf[20]; - sprintf (buf, "%.6g", number); - return buf; -} - -char *wxPropertyValidator::DoubleToString (double number) { - static char buf[20]; - sprintf (buf, "%.6g", number); - return buf; -} - -char *wxPropertyValidator::IntToString (int number) { - return ::IntToString (number); -} - -char *wxPropertyValidator::LongToString (long number) { - return ::LongToString (number); - } - -