diff --git a/tests/mbconv/convautotest.cpp b/tests/mbconv/convautotest.cpp index cde714b109..91b940a35b 100644 --- a/tests/mbconv/convautotest.cpp +++ b/tests/mbconv/convautotest.cpp @@ -34,7 +34,9 @@ public: private: CPPUNIT_TEST_SUITE( ConvAutoTestCase ); + CPPUNIT_TEST( Init ); CPPUNIT_TEST( Empty ); + CPPUNIT_TEST( Encode ); CPPUNIT_TEST( Short ); CPPUNIT_TEST( None ); CPPUNIT_TEST( UTF32LE ); @@ -42,12 +44,17 @@ private: CPPUNIT_TEST( UTF16LE ); CPPUNIT_TEST( UTF16BE ); CPPUNIT_TEST( UTF8 ); + CPPUNIT_TEST( UTF8NoBom ); + CPPUNIT_TEST( Fallback ); + CPPUNIT_TEST( FallbackMultibyte ); CPPUNIT_TEST( StreamUTF8NoBOM ); CPPUNIT_TEST( StreamUTF8 ); CPPUNIT_TEST( StreamUTF16LE ); CPPUNIT_TEST( StreamUTF16BE ); CPPUNIT_TEST( StreamUTF32LE ); CPPUNIT_TEST( StreamUTF32BE ); + CPPUNIT_TEST( StreamFallback ); + CPPUNIT_TEST( StreamFallbackMultibyte ); CPPUNIT_TEST_SUITE_END(); // expected converter state, UTF-8 without BOM by default @@ -76,9 +83,13 @@ private: // // the length of the string may need to be passed explicitly if it has // embedded NULs, otherwise it's not necessary - void TestFirstChar(const char *src, wchar_t wch, size_t len = wxNO_LEN, ConvState st = ConvState()); + void TestFirstChar(const char *src, wchar_t wch, size_t len = wxNO_LEN, + ConvState st = ConvState(), + wxFontEncoding fe = wxFONTENCODING_DEFAULT); + void Init(); void Empty(); + void Encode(); void Short(); void None(); void UTF32LE(); @@ -86,12 +97,16 @@ private: void UTF16LE(); void UTF16BE(); void UTF8(); + void UTF8NoBom(); + void Fallback(); + void FallbackMultibyte(); // test whether two lines of text are converted properly from a stream void TestTextStream(const char *src, size_t srclength, const wxString& line1, - const wxString& line2); + const wxString& line2, + wxFontEncoding fe = wxFONTENCODING_DEFAULT); void StreamUTF8NoBOM(); void StreamUTF8(); @@ -99,6 +114,8 @@ private: void StreamUTF16BE(); void StreamUTF32LE(); void StreamUTF32BE(); + void StreamFallback(); + void StreamFallbackMultibyte(); }; // register in the unnamed registry so that these tests are run by default @@ -111,15 +128,21 @@ CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(ConvAutoTestCase, "ConvAutoTestCase"); // tests // ---------------------------------------------------------------------------- -void ConvAutoTestCase::TestFirstChar(const char *src, wchar_t wch, size_t len, ConvState st) +void ConvAutoTestCase::TestFirstChar(const char *src, wchar_t wch, size_t len, + ConvState st, wxFontEncoding fe) { - wxConvAuto conv; + wxConvAuto conv(fe); wxWCharBuffer wbuf = conv.cMB2WC(src, len, NULL); CPPUNIT_ASSERT( wbuf ); CPPUNIT_ASSERT_EQUAL( wch, *wbuf ); st.Check(conv); } +void ConvAutoTestCase::Init() +{ + ConvState(wxBOM_Unknown, wxFONTENCODING_MAX).Check(wxConvAuto()); +} + void ConvAutoTestCase::Empty() { wxConvAuto conv; @@ -127,6 +150,16 @@ void ConvAutoTestCase::Empty() ConvState(wxBOM_Unknown, wxFONTENCODING_MAX).Check(conv); } +void ConvAutoTestCase::Encode() +{ + wxConvAuto conv; + wxString str = wxString::FromUTF8("\xd0\x9f\xe3\x81\x82"); + wxCharBuffer buf = conv.cWC2MB(str.wc_str()); + CPPUNIT_ASSERT( buf ); + CPPUNIT_ASSERT_EQUAL( str, wxString::FromUTF8(buf) ); + ConvState(wxBOM_Unknown, wxFONTENCODING_UTF8).Check(conv); +} + void ConvAutoTestCase::Short() { TestFirstChar("1", wxT('1')); @@ -164,13 +197,39 @@ void ConvAutoTestCase::UTF8() #endif } +void ConvAutoTestCase::UTF8NoBom() +{ +#ifdef wxHAVE_U_ESCAPE + TestFirstChar("\xd0\x9f\xe3\x81\x82", L'\u041f', wxNO_LEN, ConvState(wxBOM_None, wxFONTENCODING_UTF8)); +#endif +} + +void ConvAutoTestCase::Fallback() +{ +#ifdef wxHAVE_U_ESCAPE + TestFirstChar("\xbf", L'\u041f', wxNO_LEN, + ConvState(wxBOM_None, wxFONTENCODING_ISO8859_5, true), + wxFONTENCODING_ISO8859_5); +#endif +} + +void ConvAutoTestCase::FallbackMultibyte() +{ +#ifdef wxHAVE_U_ESCAPE + TestFirstChar("\x84\x50", L'\u041f', wxNO_LEN, + ConvState(wxBOM_None, wxFONTENCODING_CP932, true), + wxFONTENCODING_CP932); +#endif +} + void ConvAutoTestCase::TestTextStream(const char *src, size_t srclength, const wxString& line1, - const wxString& line2) + const wxString& line2, + wxFontEncoding fe) { wxMemoryInputStream instream(src, srclength); - wxTextInputStream text(instream); + wxTextInputStream text(instream, wxT(" \t"), wxConvAuto(fe)); CPPUNIT_ASSERT_EQUAL( line1, text.ReadLine() ); CPPUNIT_ASSERT_EQUAL( line2, text.ReadLine() ); @@ -191,16 +250,8 @@ const wxString line2 = wxString::FromUTF8("\xce\xb2"); void ConvAutoTestCase::StreamUTF8NoBOM() { - // currently this test doesn't work because without the BOM wxConvAuto - // decides that the string is in Latin-1 after finding the first (but not - // the two subsequent ones which are part of the same UTF-8 sequence!) - // 8-bit character - // - // FIXME: we need to fix this at wxTextInputStream level, see #11570 -#if 0 TestTextStream("\x61\xE3\x81\x82\x0A\xCE\xB2", 7, line1, line2); -#endif } void ConvAutoTestCase::StreamUTF8() @@ -235,4 +286,18 @@ void ConvAutoTestCase::StreamUTF32BE() 20, line1, line2); } +void ConvAutoTestCase::StreamFallback() +{ + // this only works if there are at least 3 bytes after the first non-ASCII character + TestTextStream("\x61\xbf\x0A\xe0\x7a", + 5, wxString::FromUTF8("a\xd0\x9f"), wxString::FromUTF8("\xd1\x80z"), + wxFONTENCODING_ISO8859_5); +} + +void ConvAutoTestCase::StreamFallbackMultibyte() +{ + TestTextStream("\x61\x82\xa0\x0A\x83\xc0", + 6, line1, line2, wxFONTENCODING_CP932); +} + #endif // wxUSE_UNICODE