fix parsing methods for non-ASCII strings (e.g. dates in non-"C" locales) (see #9560)

git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/trunk@59798 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775
This commit is contained in:
Vadim Zeitlin
2009-03-24 00:11:42 +00:00
parent 34d4d2864c
commit 66f22f4ae5
2 changed files with 183 additions and 149 deletions

View File

@@ -89,6 +89,9 @@ static const int MIN_PER_HOUR = 60;
// parsing helpers // parsing helpers
// ---------------------------------------------------------------------------- // ----------------------------------------------------------------------------
namespace
{
#ifdef HAVE_STRPTIME #ifdef HAVE_STRPTIME
#if wxUSE_UNIX && !defined(HAVE_STRPTIME_DECL) #if wxUSE_UNIX && !defined(HAVE_STRPTIME_DECL)
@@ -97,35 +100,31 @@ static const int MIN_PER_HOUR = 60;
extern "C" char *strptime(const char *, const char *, struct tm *); extern "C" char *strptime(const char *, const char *, struct tm *);
#endif #endif
// Unicode-friendly strptime() wrapper // strptime() wrapper: call strptime() for the string starting at the given
static const wxStringCharType * // iterator and fill output tm struct with the results and modify input to
CallStrptime(const wxStringCharType *input, const char *fmt, tm *tm) // point to the end of the string consumed by strptime() if successful,
// otherwise return false and don't modify anything
bool
CallStrptime(const wxString& str,
wxString::const_iterator& p,
const char *fmt,
tm *tm)
{ {
// the problem here is that strptime() returns pointer into the string we const char *start = str.mb_str();
// passed to it while we're really interested in the pointer into the start = wxStringOperations::AddToIter(start, p - str.begin());
// original, Unicode, string so we try to transform the pointer back
#if wxUSE_UNICODE_WCHAR
wxCharBuffer inputMB(wxConvertWX2MB(input));
#else // ASCII
const char * const inputMB = input;
#endif // Unicode/Ascii
const char *result = strptime(inputMB, fmt, tm); const char * const end = strptime(start, fmt, tm);
if ( !result ) if ( !end )
return NULL; return false;
#if wxUSE_UNICODE_WCHAR p += wxStringOperations::DiffIters(end, start);
// FIXME: this is wrong in presence of surrogates &c return true;
return input + (result - inputMB.data());
#else // ASCII
return result;
#endif // Unicode/Ascii
} }
#endif // HAVE_STRPTIME #endif // HAVE_STRPTIME
// return the month if the string is a month name or Inv_Month otherwise // return the month if the string is a month name or Inv_Month otherwise
static wxDateTime::Month GetMonthFromName(const wxString& name, int flags) wxDateTime::Month GetMonthFromName(const wxString& name, int flags)
{ {
wxDateTime::Month mon; wxDateTime::Month mon;
for ( mon = wxDateTime::Jan; mon < wxDateTime::Inv_Month; wxNextMonth(mon) ) for ( mon = wxDateTime::Jan; mon < wxDateTime::Inv_Month; wxNextMonth(mon) )
@@ -155,7 +154,7 @@ static wxDateTime::Month GetMonthFromName(const wxString& name, int flags)
} }
// return the weekday if the string is a weekday name or Inv_WeekDay otherwise // return the weekday if the string is a weekday name or Inv_WeekDay otherwise
static wxDateTime::WeekDay GetWeekDayFromName(const wxString& name, int flags) wxDateTime::WeekDay GetWeekDayFromName(const wxString& name, int flags)
{ {
wxDateTime::WeekDay wd; wxDateTime::WeekDay wd;
for ( wd = wxDateTime::Sun; wd < wxDateTime::Inv_WeekDay; wxNextWDay(wd) ) for ( wd = wxDateTime::Sun; wd < wxDateTime::Inv_WeekDay; wxNextWDay(wd) )
@@ -185,8 +184,8 @@ static wxDateTime::WeekDay GetWeekDayFromName(const wxString& name, int flags)
} }
// scans all digits (but no more than len) and returns the resulting number // scans all digits (but no more than len) and returns the resulting number
static bool GetNumericToken(size_t len, bool GetNumericToken(size_t len,
const wxStringCharType*& p, wxString::const_iterator& p,
unsigned long *number) unsigned long *number)
{ {
size_t n = 1; size_t n = 1;
@@ -203,7 +202,7 @@ static bool GetNumericToken(size_t len,
} }
// scans all alphabetic characters and returns the resulting string // scans all alphabetic characters and returns the resulting string
static wxString GetAlphaToken(const wxStringCharType*& p) wxString GetAlphaToken(wxString::const_iterator& p)
{ {
wxString s; wxString s;
while ( wxIsalpha(*p) ) while ( wxIsalpha(*p) )
@@ -214,6 +213,36 @@ static wxString GetAlphaToken(const wxStringCharType*& p)
return s; return s;
} }
// parses string starting at given iterator using the specified format and,
// optionally, a fall back format (and optionally another one... but it stops
// there, really)
//
// if unsuccessful, returns invalid wxDateTime without changing p; otherwise
// advance p to the end of the match and returns wxDateTime containing the
// results of the parsing
wxDateTime
ParseFormatAt(wxString::const_iterator& p,
const wxString::const_iterator& end,
const wxString& fmt,
const wxString& fmtAlt = wxString(),
const wxString& fmtAlt2 = wxString())
{
const wxString str(p, end);
wxString::const_iterator endParse;
wxDateTime dt;
if ( dt.ParseFormat(str, fmt, &endParse) ||
(!fmtAlt.empty() && dt.ParseFormat(str, fmtAlt, &endParse)) ||
(!fmtAlt2.empty() && dt.ParseFormat(str, fmtAlt2, &endParse)) )
{
p += endParse - str.begin();
}
//else: all formats failed
return dt;
}
} // anonymous namespace
// ---------------------------------------------------------------------------- // ----------------------------------------------------------------------------
// wxDateTime to/from text representations // wxDateTime to/from text representations
// ---------------------------------------------------------------------------- // ----------------------------------------------------------------------------
@@ -1089,7 +1118,7 @@ wxDateTime::ParseFormat(const wxString& date,
wxDateTime::Month mon = Inv_Month; wxDateTime::Month mon = Inv_Month;
int year = 0; int year = 0;
const wxStringCharType *input = date.wx_str(); wxString::const_iterator input = date.begin();
for ( wxString::const_iterator fmt = format.begin(); fmt != format.end(); ++fmt ) for ( wxString::const_iterator fmt = format.begin(); fmt != format.end(); ++fmt )
{ {
if ( *fmt != _T('%') ) if ( *fmt != _T('%') )
@@ -1185,24 +1214,14 @@ wxDateTime::ParseFormat(const wxString& date,
case _T('c'): // locale default date and time representation case _T('c'): // locale default date and time representation
{ {
wxDateTime dt;
const wxString inc(input);
// NOTE: %c is locale-dependent; try strptime
#ifdef HAVE_STRPTIME #ifdef HAVE_STRPTIME
struct tm tm; struct tm tm;
// try using strptime() -- it may fail even if the input is // try using strptime() -- it may fail even if the input is
// correct but the date is out of range, so we will fall back // correct but the date is out of range, so we will fall back
// to our generic code anyhow // to our generic code anyhow
const wxStringCharType * if ( CallStrptime(date, input, "%c", &tm) )
result = CallStrptime(input, "%c", &tm);
if ( result )
{ {
haveDay = haveMon = haveYear =
haveHour = haveMin = haveSec = true;
hour = tm.tm_hour; hour = tm.tm_hour;
min = tm.tm_min; min = tm.tm_min;
sec = tm.tm_sec; sec = tm.tm_sec;
@@ -1210,29 +1229,26 @@ wxDateTime::ParseFormat(const wxString& date,
year = 1900 + tm.tm_year; year = 1900 + tm.tm_year;
mon = (Month)tm.tm_mon; mon = (Month)tm.tm_mon;
mday = tm.tm_mday; mday = tm.tm_mday;
input = result; // proceed where strptime() ended
} }
else else // strptime() failed; try generic heuristic code
{
// strptime() failed; try generic heuristic code
#endif // HAVE_STRPTIME #endif // HAVE_STRPTIME
// try the format which corresponds to ctime() output first
wxString::const_iterator endc;
if ( !dt.ParseFormat(inc, wxS("%a %b %d %H:%M:%S %Y"), &endc) &&
!dt.ParseFormat(inc, wxS("%x %X"), &endc) &&
!dt.ParseFormat(inc, wxS("%X %x"), &endc) )
{ {
// we've tried everything and still no match
// try the format which corresponds to ctime() output
// first, then the generic date/time formats
const wxDateTime dt = ParseFormatAt
(
input,
date.end(),
wxS("%a %b %d %H:%M:%S %Y"),
wxS("%x %X"),
wxS("%X %x")
);
if ( !dt.IsValid() )
return NULL; return NULL;
}
Tm tm = dt.GetTm(); Tm tm = dt.GetTm();
haveDay = haveMon = haveYear =
haveHour = haveMin = haveSec = true;
hour = tm.hour; hour = tm.hour;
min = tm.min; min = tm.min;
sec = tm.sec; sec = tm.sec;
@@ -1240,11 +1256,10 @@ wxDateTime::ParseFormat(const wxString& date,
year = tm.year; year = tm.year;
mon = tm.mon; mon = tm.mon;
mday = tm.mday; mday = tm.mday;
input += endc - inc.begin();
#ifdef HAVE_STRPTIME
} }
#endif // HAVE_STRPTIME
haveDay = haveMon = haveYear =
haveHour = haveMin = haveSec = true;
} }
break; break;
@@ -1355,12 +1370,9 @@ wxDateTime::ParseFormat(const wxString& date,
case _T('r'): // time as %I:%M:%S %p case _T('r'): // time as %I:%M:%S %p
{ {
wxDateTime dt; wxDateTime dt;
input = dt.ParseFormat(input, wxS("%I:%M:%S %p")); if ( !dt.ParseFormat(wxString(input, date.end()),
if ( !input ) wxS("%I:%M:%S %p"), &input) )
{
// no match
return NULL; return NULL;
}
haveHour = haveMin = haveSec = true; haveHour = haveMin = haveSec = true;
@@ -1373,15 +1385,13 @@ wxDateTime::ParseFormat(const wxString& date,
case _T('R'): // time as %H:%M case _T('R'): // time as %H:%M
{ {
wxDateTime dt; const wxDateTime
input = dt.ParseFormat(input, wxS("%H:%M")); dt = ParseFormatAt(input, date.end(), wxS("%H:%M"));
if ( !input ) if ( !dt.IsValid() )
{
// no match
return NULL; return NULL;
}
haveHour = haveMin = true; haveHour =
haveMin = true;
Tm tm = dt.GetTm(); Tm tm = dt.GetTm();
hour = tm.hour; hour = tm.hour;
@@ -1402,15 +1412,14 @@ wxDateTime::ParseFormat(const wxString& date,
case _T('T'): // time as %H:%M:%S case _T('T'): // time as %H:%M:%S
{ {
wxDateTime dt; const wxDateTime
input = dt.ParseFormat(input, _T("%H:%M:%S")); dt = ParseFormatAt(input, date.end(), wxS("%H:%M:%S"));
if ( !input ) if ( !dt.IsValid() )
{
// no match
return NULL; return NULL;
}
haveHour = haveMin = haveSec = true; haveHour =
haveMin =
haveSec = true;
Tm tm = dt.GetTm(); Tm tm = dt.GetTm();
hour = tm.hour; hour = tm.hour;
@@ -1438,12 +1447,8 @@ wxDateTime::ParseFormat(const wxString& date,
{ {
struct tm tm; struct tm tm;
const wxStringCharType * if ( CallStrptime(date, input, "%x", &tm) )
result = CallStrptime(input, "%x", &tm);
if ( result )
{ {
input = result;
haveDay = haveMon = haveYear = true; haveDay = haveMon = haveYear = true;
year = 1900 + tm.tm_year; year = 1900 + tm.tm_year;
@@ -1456,7 +1461,6 @@ wxDateTime::ParseFormat(const wxString& date,
#endif // HAVE_STRPTIME #endif // HAVE_STRPTIME
{ {
wxDateTime dt;
wxString fmtDate, wxString fmtDate,
fmtDateAlt; fmtDateAlt;
@@ -1480,17 +1484,11 @@ wxDateTime::ParseFormat(const wxString& date,
} }
} }
const wxString indate(input); const wxDateTime
wxString::const_iterator endDate; dt = ParseFormatAt(input, date.end(),
if ( !dt.ParseFormat(indate, fmtDate, &endDate) ) fmtDate, fmtDateAlt);
{ if ( !dt.IsValid() )
// try another one if we have it
if ( fmtDateAlt.empty() ||
!dt.ParseFormat(indate, fmtDateAlt, &endDate) )
{
return NULL; return NULL;
}
}
Tm tm = dt.GetTm(); Tm tm = dt.GetTm();
@@ -1501,8 +1499,6 @@ wxDateTime::ParseFormat(const wxString& date,
year = tm.year; year = tm.year;
mon = tm.mon; mon = tm.mon;
mday = tm.mday; mday = tm.mday;
input += endDate - indate.begin();
} }
break; break;
@@ -1512,11 +1508,8 @@ wxDateTime::ParseFormat(const wxString& date,
{ {
// use strptime() to do it for us (FIXME !Unicode friendly) // use strptime() to do it for us (FIXME !Unicode friendly)
struct tm tm; struct tm tm;
input = CallStrptime(input, "%X", &tm); if ( !CallStrptime(date, input, "%X", &tm) )
if ( !input )
{
return NULL; return NULL;
}
haveHour = haveMin = haveSec = true; haveHour = haveMin = haveSec = true;
@@ -1534,18 +1527,10 @@ wxDateTime::ParseFormat(const wxString& date,
// common cases // common cases
wxDateTime dt; wxDateTime dt;
const wxStringCharType * const wxDateTime
result = dt.ParseFormat(input, wxS("%T")); dt = ParseFormatAt(input, date.end(), "%T", "%r");
if ( !result ) if ( !dt.IsValid() )
{
result = dt.ParseFormat(input, wxS("%r"));
}
if ( !result )
{
// no match
return NULL; return NULL;
}
haveHour = haveHour =
haveMin = haveMin =
@@ -1555,8 +1540,6 @@ wxDateTime::ParseFormat(const wxString& date,
hour = tm.hour; hour = tm.hour;
min = tm.min; min = tm.min;
sec = tm.sec; sec = tm.sec;
input = result;
} }
#endif // HAVE_STRPTIME/!HAVE_STRPTIME #endif // HAVE_STRPTIME/!HAVE_STRPTIME
break; break;
@@ -1705,11 +1688,10 @@ wxDateTime::ParseFormat(const wxString& date,
return NULL; return NULL;
} }
const size_t endpos = input - date.wx_str();
if ( end ) if ( end )
*end = date.begin() + endpos; *end = input;
return date.c_str() + endpos; return date.c_str() + (input - date.begin());
} }
const char * const char *
@@ -1773,7 +1755,9 @@ wxDateTime::ParseDate(const wxString& date, wxString::const_iterator *end)
// "today" (for wxDate compatibility) and digits only otherwise (and not // "today" (for wxDate compatibility) and digits only otherwise (and not
// all esoteric constructions ParseDateTime() knows about) // all esoteric constructions ParseDateTime() knows about)
const wxStringCharType *p = date.wx_str(); const wxString::const_iterator pBegin = date.begin();
wxString::const_iterator p = pBegin;
while ( wxIsspace(*p) ) while ( wxIsspace(*p) )
p++; p++;
@@ -1793,13 +1777,13 @@ wxDateTime::ParseDate(const wxString& date, wxString::const_iterator *end)
{ {
const wxString dateStr = wxGetTranslation(literalDates[n].str); const wxString dateStr = wxGetTranslation(literalDates[n].str);
size_t len = dateStr.length(); size_t len = dateStr.length();
if ( wxStrlen(p) >= len )
{ const wxString::const_iterator pEnd = p + len;
wxString str(p, len); if ( wxString(p, pEnd).CmpNoCase(dateStr) == 0 )
if ( str.CmpNoCase(dateStr) == 0 )
{ {
// nothing can follow this, so stop here // nothing can follow this, so stop here
p += len;
p = pEnd;
int dayDiffFromToday = literalDates[n].dayDiffFromToday; int dayDiffFromToday = literalDates[n].dayDiffFromToday;
*this = Today(); *this = Today();
@@ -1808,12 +1792,11 @@ wxDateTime::ParseDate(const wxString& date, wxString::const_iterator *end)
*this += wxDateSpan::Days(dayDiffFromToday); *this += wxDateSpan::Days(dayDiffFromToday);
} }
const size_t endpos = p - date.wx_str();
if ( end ) if ( end )
*end = date.begin() + endpos; *end = pEnd;
return date.c_str() + endpos;
} return wxStringOperations::AddToIter(date.c_str().AsChar(),
pEnd - pBegin);
} }
} }
@@ -1837,7 +1820,7 @@ wxDateTime::ParseDate(const wxString& date, wxString::const_iterator *end)
// tokenize the string // tokenize the string
size_t nPosCur = 0; size_t nPosCur = 0;
static const wxStringCharType *dateDelimiters = wxS(".,/-\t\r\n "); static const wxStringCharType *dateDelimiters = wxS(".,/-\t\r\n ");
wxStringTokenizer tok(p, dateDelimiters); wxStringTokenizer tok(wxString(p, date.end()), dateDelimiters);
while ( tok.HasMoreTokens() ) while ( tok.HasMoreTokens() )
{ {
wxString token = tok.GetNextToken(); wxString token = tok.GetNextToken();
@@ -2117,11 +2100,10 @@ wxDateTime::ParseDate(const wxString& date, wxString::const_iterator *end)
p--; p--;
} }
const size_t endpos = p - date.wx_str();
if ( end ) if ( end )
*end = date.begin() + endpos; *end = p;
return date.c_str() + endpos; return wxStringOperations::AddToIter(date.c_str().AsChar(), p - pBegin);
} }
const char * const char *

View File

@@ -92,10 +92,64 @@ public:
private: private:
const char * const m_locOld; const char * const m_locOld;
wxDECLARE_NO_COPY_CLASS(CLocaleSetter); wxDECLARE_NO_COPY_CLASS(CLocaleSetter);
}; };
// helper function translating week day/month names from English to the current
// locale
static wxString TranslateDate(const wxString& str)
{
// small optimization: if there are no alphabetic characters in the string,
// there is nothing to translate
wxString::const_iterator i, end = str.end();
for ( i = str.begin(); i != end; ++i )
{
if ( isalpha(*i) )
break;
}
if ( i == end )
return str;
wxString trans(str);
static const char *weekdays[] =
{
"Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"
};
for ( wxDateTime::WeekDay wd = wxDateTime::Sun;
wd < wxDateTime::Inv_WeekDay;
wxNextWDay(wd) )
{
trans.Replace
(
weekdays[wd],
wxDateTime::GetWeekDayName(wd, wxDateTime::Name_Abbr)
);
}
static const char *months[] =
{
"Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct",
"Nov", "Dec"
};
for ( wxDateTime::Month mon = wxDateTime::Jan;
mon < wxDateTime::Inv_Month;
wxNextMonth(mon) )
{
trans.Replace
(
months[mon],
wxDateTime::GetMonthName(mon, wxDateTime::Name_Abbr)
);
}
return trans;
}
// ---------------------------------------------------------------------------- // ----------------------------------------------------------------------------
// broken down date representation used for testing // broken down date representation used for testing
// ---------------------------------------------------------------------------- // ----------------------------------------------------------------------------
@@ -918,13 +972,9 @@ void DateTimeTestCase::TestDateParse()
CPPUNIT_ASSERT( dt.ParseDate(_T("today")) ); CPPUNIT_ASSERT( dt.ParseDate(_T("today")) );
CPPUNIT_ASSERT_EQUAL( wxDateTime::Today(), dt ); CPPUNIT_ASSERT_EQUAL( wxDateTime::Today(), dt );
// the other test strings use "C" locale so set it for the duration of this
// test
CLocaleSetter cloc;
for ( size_t n = 0; n < WXSIZEOF(parseTestDates); n++ ) for ( size_t n = 0; n < WXSIZEOF(parseTestDates); n++ )
{ {
const char * const datestr = parseTestDates[n].str; const wxString datestr = TranslateDate(parseTestDates[n].str);
const char * const end = dt.ParseDate(datestr); const char * const end = dt.ParseDate(datestr);
if ( end && !*end ) if ( end && !*end )
@@ -1033,15 +1083,17 @@ void DateTimeTestCase::TestDateTimeParse()
{ 22, wxDateTime::Nov, 2007, 19, 40, 0}, true }, { 22, wxDateTime::Nov, 2007, 19, 40, 0}, true },
}; };
// the test strings use "C" locale so set it for the duration of this test // the test strings here use "PM" which is not available in all locales so
// we need to use "C" locale for them
CLocaleSetter cloc; CLocaleSetter cloc;
wxDateTime dt; wxDateTime dt;
for ( size_t n = 0; n < WXSIZEOF(parseTestDates); n++ ) for ( size_t n = 0; n < WXSIZEOF(parseTestDates); n++ )
{ {
const char * const datestr = parseTestDates[n].str; const wxString datestr = TranslateDate(parseTestDates[n].str);
if ( dt.ParseDateTime(datestr) ) const char * const end = dt.ParseDateTime(datestr);
if ( end && !*end )
{ {
WX_ASSERT_MESSAGE( WX_ASSERT_MESSAGE(
("Erroneously parsed \"%s\"", datestr), ("Erroneously parsed \"%s\"", datestr),