1. changed wxStringTokenizer to not modify the string we're iterating over

but to just update our position in it (makes the code much more clear)
2. added GetLastDelimiter() to make up for lack of mode combining
   wxTOKEN_RET_EMPTY_ALL and RET_DELIMS
3. documented it and added unit tests for it


git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/trunk@36552 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775
This commit is contained in:
Vadim Zeitlin
2005-12-24 00:12:54 +00:00
parent 505a8c2ced
commit 4626c57c58
5 changed files with 112 additions and 62 deletions

View File

@@ -14,6 +14,7 @@ INCOMPATIBLE CHANGES SINCE 2.6.x
All: All:
- wxLaunchDefaultBrowser() now supports wxBROWSER_NEW_WINDOW flag - wxLaunchDefaultBrowser() now supports wxBROWSER_NEW_WINDOW flag
- Added wxStringTokenizer::GetLastDelimiter(); improved documentation
- wxGetWorkingDirectory() deprecated. Use wxGetCwd() instead. - wxGetWorkingDirectory() deprecated. Use wxGetCwd() instead.
All (GUI): All (GUI):

View File

@@ -43,17 +43,23 @@ same as {\tt wxTOKEN\_STRTOK} if the delimiter string contains only
whitespaces, same as {\tt wxTOKEN\_RET\_EMPTY} otherwise} whitespaces, same as {\tt wxTOKEN\_RET\_EMPTY} otherwise}
\twocolitem{{\tt wxTOKEN\_RET\_EMPTY}}{In this mode, the empty tokens in the \twocolitem{{\tt wxTOKEN\_RET\_EMPTY}}{In this mode, the empty tokens in the
middle of the string will be returned, i.e. {\tt "a::b:"} will be tokenized in middle of the string will be returned, i.e. {\tt "a::b:"} will be tokenized in
three tokens `a', `' and `b'.} three tokens `a', `' and `b'. Notice that all trailing delimiters are ignored
\twocolitem{{\tt wxTOKEN\_RET\_EMPTY\_ALL}}{In this mode, empty trailing token in this mode, not just the last one, i.e. a string \texttt{"a::b::"} would
(after the last delimiter character) will be returned as well. The string as still result in the same set of tokens.}
above will contain four tokens: the already mentioned ones and another empty \twocolitem{{\tt wxTOKEN\_RET\_EMPTY\_ALL}}{In this mode, empty trailing tokens
one as the last one.} (including the one after the last delimiter character) will be returned as
well. The string \texttt{"a::b:"} will be tokenized in four tokens: the already
mentioned ones and another empty one as the last one and a string
\texttt{"a::b::"} will have five tokens.}
\twocolitem{{\tt wxTOKEN\_RET\_DELIMS}}{In this mode, the delimiter character \twocolitem{{\tt wxTOKEN\_RET\_DELIMS}}{In this mode, the delimiter character
after the end of the current token (there may be none if this is the last after the end of the current token (there may be none if this is the last
token) is returned appended to the token. Otherwise, it is the same mode as token) is returned appended to the token. Otherwise, it is the same mode as
{\tt wxTOKEN\_RET\_EMPTY}.} \texttt{wxTOKEN\_RET\_EMPTY}. Notice that there is no mode like this one but
behaving like \texttt{wxTOKEN\_RET\_EMPTY\_ALL} instead of
\texttt{wxTOKEN\_RET\_EMPTY}, use \texttt{wxTOKEN\_RET\_EMPTY\_ALL} and
\helpref{GetLastDelimiter()}{wxstringtokenizergetlastdelimiter} to emulate it.}
\twocolitem{{\tt wxTOKEN\_STRTOK}}{In this mode the class behaves exactly like \twocolitem{{\tt wxTOKEN\_STRTOK}}{In this mode the class behaves exactly like
the standard {\tt strtok()} function. The empty tokens are never returned.} the standard {\tt strtok()} function: the empty tokens are never returned.}
\end{twocollist} \end{twocollist}
\wxheading{Derived from} \wxheading{Derived from}
@@ -103,9 +109,19 @@ reaches $0$ \helpref{HasMoreTokens}{wxstringtokenizerhasmoretokens} returns
Returns \true if the tokenizer has further tokens, \false if none are left. Returns \true if the tokenizer has further tokens, \false if none are left.
\membersection{wxStringTokenizer::GetLastDelimiter}\label{wxstringtokenizergetlastdelimiter}
\func{wxChar}{GetLastDelimiter}{\void}
Returns the delimiter which ended scan for the last token returned by
\helpref{GetNextToken()}{wxstringtokenizergetnexttoken} or \texttt{NUL} if
there had been no calls to this function yet or if it returned the trailing
empty token in \texttt{wxTOKEN\_RET\_EMPTY\_ALL} mode.
\membersection{wxStringTokenizer::GetNextToken}\label{wxstringtokenizergetnexttoken} \membersection{wxStringTokenizer::GetNextToken}\label{wxstringtokenizergetnexttoken}
\func{wxString}{GetNextToken}{\void} \constfunc{wxString}{GetNextToken}{\void}
Returns the next token or empty string if the end of string was reached. Returns the next token or empty string if the end of string was reached.

View File

@@ -58,12 +58,16 @@ public:
void Reinit(const wxString& str); void Reinit(const wxString& str);
// tokens access // tokens access
// count them // return the number of remaining tokens
size_t CountTokens() const; size_t CountTokens() const;
// did we reach the end of the string? // did we reach the end of the string?
bool HasMoreTokens() const; bool HasMoreTokens() const;
// get the next token, will return empty string if !HasMoreTokens() // get the next token, will return empty string if !HasMoreTokens()
wxString GetNextToken(); wxString GetNextToken();
// get the delimiter which terminated the token last retrieved by
// GetNextToken() or NUL if there had been no tokens yet or the last
// one wasn't terminated (but ran to the end of the string)
wxChar GetLastDelimiter() const { return m_lastDelim; }
// get current tokenizer state // get current tokenizer state
// returns the part of the string which remains to tokenize (*not* the // returns the part of the string which remains to tokenize (*not* the
@@ -79,6 +83,9 @@ public:
// get the current mode - can be different from the one passed to the // get the current mode - can be different from the one passed to the
// ctor if it was wxTOKEN_DEFAULT // ctor if it was wxTOKEN_DEFAULT
wxStringTokenizerMode GetMode() const { return m_mode; } wxStringTokenizerMode GetMode() const { return m_mode; }
// do we return empty tokens?
bool AllowEmpty() const { return m_mode != wxTOKEN_STRTOK; }
// backwards compatibility section from now on // backwards compatibility section from now on
// ------------------------------------------- // -------------------------------------------
@@ -104,14 +111,14 @@ public:
protected: protected:
bool IsOk() const { return m_mode != wxTOKEN_INVALID; } bool IsOk() const { return m_mode != wxTOKEN_INVALID; }
wxString m_string, // the (rest of) string to tokenize wxString m_string, // the string we tokenize
m_delims; // all delimiters m_delims; // all possible delimiters
size_t m_pos; // the position in the original string size_t m_pos; // the current position in m_string
wxStringTokenizerMode m_mode; // see wxTOKEN_XXX values wxStringTokenizerMode m_mode; // see wxTOKEN_XXX values
bool m_hasMore; // do we have more (possible empty) tokens? wxChar m_lastDelim; // delimiter after last token or '\0'
}; };
// ---------------------------------------------------------------------------- // ----------------------------------------------------------------------------

View File

@@ -86,9 +86,7 @@ void wxStringTokenizer::Reinit(const wxString& str)
m_string = str; m_string = str;
m_pos = 0; m_pos = 0;
m_lastDelim = _T('\0');
// empty string doesn't have any tokens
m_hasMore = !m_string.empty();
} }
// ---------------------------------------------------------------------------- // ----------------------------------------------------------------------------
@@ -100,49 +98,61 @@ bool wxStringTokenizer::HasMoreTokens() const
{ {
wxCHECK_MSG( IsOk(), false, _T("you should call SetString() first") ); wxCHECK_MSG( IsOk(), false, _T("you should call SetString() first") );
if ( m_string.find_first_not_of(m_delims) == wxString::npos ) if ( m_string.find_first_not_of(m_delims, m_pos) != wxString::npos )
{ {
// no non empty tokens left, but in 2 cases we still may return true if // there are non delimiter characters left, so we do have more tokens
// GetNextToken() wasn't called yet for this empty token:
//
// a) in wxTOKEN_RET_EMPTY_ALL mode we always do it
// b) in wxTOKEN_RET_EMPTY mode we do it in the special case of a
// string containing only the delimiter: then there is an empty
// token just before it
return (m_mode == wxTOKEN_RET_EMPTY_ALL) ||
(m_mode == wxTOKEN_RET_EMPTY && m_pos == 0)
? m_hasMore : false;
}
else
{
// there are non delimiter characters left, hence we do have more
// tokens
return true; return true;
} }
switch ( m_mode )
{
case wxTOKEN_RET_EMPTY:
case wxTOKEN_RET_DELIMS:
// special hack for wxTOKEN_RET_EMPTY: we should return the initial
// empty token even if there are only delimiters after it
return m_pos == 0 && !m_string.empty();
case wxTOKEN_RET_EMPTY_ALL:
// special hack for wxTOKEN_RET_EMPTY_ALL: we can know if we had
// already returned the trailing empty token after the last
// delimiter by examining m_lastDelim: it is set to NUL if we run
// up to the end of the string in GetNextToken(), but if it is not
// NUL yet we still have this last token to return even if m_pos is
// already at m_string.length()
return m_pos < m_string.length() || m_lastDelim != _T('\0');
case wxTOKEN_INVALID:
case wxTOKEN_DEFAULT:
wxFAIL_MSG( _T("unexpected tokenizer mode") );
// fall through
case wxTOKEN_STRTOK:
// never return empty delimiters
break;
} }
// count the number of tokens in the string return false;
}
// count the number of (remaining) tokens in the string
size_t wxStringTokenizer::CountTokens() const size_t wxStringTokenizer::CountTokens() const
{ {
wxCHECK_MSG( IsOk(), 0, _T("you should call SetString() first") ); wxCHECK_MSG( IsOk(), 0, _T("you should call SetString() first") );
// VZ: this function is IMHO not very useful, so it's probably not very // VZ: this function is IMHO not very useful, so it's probably not very
// important if it's implementation here is not as efficient as it // important if its implementation here is not as efficient as it
// could be - but OTOH like this we're sure to get the correct answer // could be -- but OTOH like this we're sure to get the correct answer
// in all modes // in all modes
wxStringTokenizer *self = (wxStringTokenizer *)this; // const_cast wxStringTokenizer tkz(m_string.c_str() + m_pos, m_delims, m_mode);
wxString stringInitial = m_string;
size_t count = 0; size_t count = 0;
while ( self->HasMoreTokens() ) while ( tkz.HasMoreTokens() )
{ {
count++; count++;
(void)self->GetNextToken(); (void)tkz.GetNextToken();
} }
self->Reinit(stringInitial);
return count; return count;
} }
@@ -152,9 +162,6 @@ size_t wxStringTokenizer::CountTokens() const
wxString wxStringTokenizer::GetNextToken() wxString wxStringTokenizer::GetNextToken()
{ {
// strtok() doesn't return empty tokens, all other modes do
bool allowEmpty = m_mode != wxTOKEN_STRTOK;
wxString token; wxString token;
do do
{ {
@@ -162,40 +169,40 @@ wxString wxStringTokenizer::GetNextToken()
{ {
break; break;
} }
// find the end of this token // find the end of this token
size_t pos = m_string.find_first_of(m_delims); size_t pos = m_string.find_first_of(m_delims, m_pos);
// and the start of the next one // and the start of the next one
if ( pos == wxString::npos ) if ( pos == wxString::npos )
{ {
// no more delimiters, the token is everything till the end of // no more delimiters, the token is everything till the end of
// string // string
token = m_string; token.assign(m_string, m_pos, wxString::npos);
m_pos += m_string.length(); // skip the token
m_string.clear(); m_pos = m_string.length();
// no more tokens in this string, even in wxTOKEN_RET_EMPTY_ALL // it wasn't terminated
// mode (we will return the trailing one right now in this case) m_lastDelim = _T('\0');
m_hasMore = false;
} }
else else // we found a delimiter at pos
{ {
size_t pos2 = pos + 1;
// in wxTOKEN_RET_DELIMS mode we return the delimiter character // in wxTOKEN_RET_DELIMS mode we return the delimiter character
// with token // with token, otherwise leave it out
token = wxString(m_string, m_mode == wxTOKEN_RET_DELIMS ? pos2 size_t len = pos - m_pos;
: pos); if ( m_mode == wxTOKEN_RET_DELIMS )
len++;
// remove token with the following it delimiter from string token.assign(m_string, m_pos, len);
m_string.erase(0, pos2);
// keep track of the position in the original string too // skip the token and the trailing delimiter
m_pos += pos2; m_pos = pos + 1;
m_lastDelim = m_string[pos];
} }
} }
while ( !allowEmpty && token.empty() ); while ( !AllowEmpty() && token.empty() );
return token; return token;
} }

View File

@@ -36,11 +36,13 @@ private:
CPPUNIT_TEST_SUITE( TokenizerTestCase ); CPPUNIT_TEST_SUITE( TokenizerTestCase );
CPPUNIT_TEST( GetCount ); CPPUNIT_TEST( GetCount );
CPPUNIT_TEST( GetPosition ); CPPUNIT_TEST( GetPosition );
CPPUNIT_TEST( LastDelimiter );
CPPUNIT_TEST( StrtokCompat ); CPPUNIT_TEST( StrtokCompat );
CPPUNIT_TEST_SUITE_END(); CPPUNIT_TEST_SUITE_END();
void GetCount(); void GetCount();
void GetPosition(); void GetPosition();
void LastDelimiter();
void StrtokCompat(); void StrtokCompat();
DECLARE_NO_COPY_CLASS(TokenizerTestCase) DECLARE_NO_COPY_CLASS(TokenizerTestCase)
@@ -184,6 +186,23 @@ void TokenizerTestCase::GetPosition()
DoTestGetPosition(_T("foo_bar_"), _T("_"), 4, 8, 0); DoTestGetPosition(_T("foo_bar_"), _T("_"), 4, 8, 0);
} }
void TokenizerTestCase::LastDelimiter()
{
wxStringTokenizer tkz(_T("a+-b=c"), _T("+-="));
tkz.GetNextToken();
CPPUNIT_ASSERT_EQUAL( _T('+'), tkz.GetLastDelimiter() );
tkz.GetNextToken();
CPPUNIT_ASSERT_EQUAL( _T('-'), tkz.GetLastDelimiter() );
tkz.GetNextToken();
CPPUNIT_ASSERT_EQUAL( _T('='), tkz.GetLastDelimiter() );
tkz.GetNextToken();
CPPUNIT_ASSERT_EQUAL( _T('\0'), tkz.GetLastDelimiter() );
}
void TokenizerTestCase::StrtokCompat() void TokenizerTestCase::StrtokCompat()
{ {
for ( size_t n = 0; n < WXSIZEOF(gs_testData); n++ ) for ( size_t n = 0; n < WXSIZEOF(gs_testData); n++ )