Fix wxConvAuto behaviour when it is used by wxTextInputStream.
wxConvAuto implicitly supposed that the chunk of data passed to it for translation was big enough to allow it to at least detect the BOM from it. However this isn't necessarily the case and never is with wxTextInputStream which reads the bytes one by one. Fix this by waiting until we have enough data to be able to detect the BOM. This still doesn't fix the problem with streams without BOM and the corresponding unit test still fails -- it will need to be fixed at the level of wxTextInputStream itself later but handling correctly the cases when a BOM is present is already better than before. See #11570. git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/trunk@63064 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775
This commit is contained in:
@@ -19,11 +19,11 @@
|
||||
|
||||
#if wxUSE_WCHAR_T
|
||||
|
||||
#ifndef WX_PRECOMP
|
||||
#endif // WX_PRECOMP
|
||||
|
||||
#include "wx/convauto.h"
|
||||
|
||||
#include "wx/mstream.h"
|
||||
#include "wx/txtstrm.h"
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// test class
|
||||
// ----------------------------------------------------------------------------
|
||||
@@ -43,6 +43,12 @@ private:
|
||||
CPPUNIT_TEST( UTF16LE );
|
||||
CPPUNIT_TEST( UTF16BE );
|
||||
CPPUNIT_TEST( UTF8 );
|
||||
CPPUNIT_TEST( StreamUTF8NoBOM );
|
||||
CPPUNIT_TEST( StreamUTF8 );
|
||||
CPPUNIT_TEST( StreamUTF16LE );
|
||||
CPPUNIT_TEST( StreamUTF16BE );
|
||||
CPPUNIT_TEST( StreamUTF32LE );
|
||||
CPPUNIT_TEST( StreamUTF32BE );
|
||||
CPPUNIT_TEST_SUITE_END();
|
||||
|
||||
// real test function: check that converting the src multibyte string to
|
||||
@@ -57,6 +63,19 @@ private:
|
||||
void UTF16LE();
|
||||
void UTF16BE();
|
||||
void UTF8();
|
||||
|
||||
// test whether two lines of text are converted properly from a stream
|
||||
void TestTextStream(const char *src,
|
||||
size_t srclength,
|
||||
const wxString& line1,
|
||||
const wxString& line2);
|
||||
|
||||
void StreamUTF8NoBOM();
|
||||
void StreamUTF8();
|
||||
void StreamUTF16LE();
|
||||
void StreamUTF16BE();
|
||||
void StreamUTF32LE();
|
||||
void StreamUTF32BE();
|
||||
};
|
||||
|
||||
// register in the unnamed registry so that these tests are run by default
|
||||
@@ -118,5 +137,76 @@ void ConvAutoTestCase::UTF8()
|
||||
#endif
|
||||
}
|
||||
|
||||
void ConvAutoTestCase::TestTextStream(const char *src,
|
||||
size_t srclength,
|
||||
const wxString& line1,
|
||||
const wxString& line2)
|
||||
{
|
||||
wxMemoryInputStream instream(src, srclength);
|
||||
wxTextInputStream text(instream);
|
||||
|
||||
CPPUNIT_ASSERT_EQUAL( line1, text.ReadLine() );
|
||||
CPPUNIT_ASSERT_EQUAL( line2, text.ReadLine() );
|
||||
}
|
||||
|
||||
// the first line of the teststring used in the following functions is an
|
||||
// 'a' followed by a Japanese hiragana A (u+3042).
|
||||
// The second line is a single Greek beta (u+03B2). There is no blank line
|
||||
// at the end.
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
const wxString line1 = wxString::FromUTF8("a\xe3\x81\x82");
|
||||
const wxString line2 = wxString::FromUTF8("\xce\xb2");
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
void ConvAutoTestCase::StreamUTF8NoBOM()
|
||||
{
|
||||
// currently this test doesn't work because without the BOM wxConvAuto
|
||||
// decides that the string is in Latin-1 after finding the first (but not
|
||||
// the two subsequent ones which are part of the same UTF-8 sequence!)
|
||||
// 8-bit character
|
||||
//
|
||||
// FIXME: we need to fix this at wxTextInputStream level, see #11570
|
||||
#if 0
|
||||
TestTextStream("\x61\xE3\x81\x82\x0A\xCE\xB2",
|
||||
7, line1, line2);
|
||||
#endif
|
||||
}
|
||||
|
||||
void ConvAutoTestCase::StreamUTF8()
|
||||
{
|
||||
TestTextStream("\xEF\xBB\xBF\x61\xE3\x81\x82\x0A\xCE\xB2",
|
||||
10, line1, line2);
|
||||
}
|
||||
|
||||
void ConvAutoTestCase::StreamUTF16LE()
|
||||
{
|
||||
TestTextStream("\xFF\xFE\x61\x00\x42\x30\x0A\x00\xB2\x03",
|
||||
10, line1, line2);
|
||||
}
|
||||
|
||||
void ConvAutoTestCase::StreamUTF16BE()
|
||||
{
|
||||
TestTextStream("\xFE\xFF\x00\x61\x30\x42\x00\x0A\x03\xB2",
|
||||
10, line1, line2);
|
||||
}
|
||||
|
||||
void ConvAutoTestCase::StreamUTF32LE()
|
||||
{
|
||||
TestTextStream("\xFF\xFE\0\0\x61\x00\0\0\x42\x30\0\0\x0A"
|
||||
"\x00\0\0\xB2\x03\0\0",
|
||||
20, line1, line2);
|
||||
}
|
||||
|
||||
void ConvAutoTestCase::StreamUTF32BE()
|
||||
{
|
||||
TestTextStream("\0\0\xFE\xFF\0\0\x00\x61\0\0\x30\x42\0\0\x00\x0A"
|
||||
"\0\0\x03\xB2",
|
||||
20, line1, line2);
|
||||
}
|
||||
|
||||
#endif // wxUSE_WCHAR_T
|
||||
|
||||
|
Reference in New Issue
Block a user