Fix wxConvAuto behaviour when it is used by wxTextInputStream.

wxConvAuto implicitly supposed that the chunk of data passed to it for
translation was big enough to allow it to at least detect the BOM from it.
However this isn't necessarily the case and never is with wxTextInputStream
which reads the bytes one by one.

Fix this by waiting until we have enough data to be able to detect the BOM.
This still doesn't fix the problem with streams without BOM and the
corresponding unit test still fails -- it will need to be fixed at the level
of wxTextInputStream itself later but handling correctly the cases when a BOM
is present is already better than before.

See #11570.

git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/trunk@63064 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775
This commit is contained in:
Vadim Zeitlin
2010-01-04 12:22:49 +00:00
parent 55e5154d2c
commit 4cb0e8d05c
5 changed files with 213 additions and 59 deletions

View File

@@ -19,11 +19,11 @@
#if wxUSE_WCHAR_T
#ifndef WX_PRECOMP
#endif // WX_PRECOMP
#include "wx/convauto.h"
#include "wx/mstream.h"
#include "wx/txtstrm.h"
// ----------------------------------------------------------------------------
// test class
// ----------------------------------------------------------------------------
@@ -43,6 +43,12 @@ private:
CPPUNIT_TEST( UTF16LE );
CPPUNIT_TEST( UTF16BE );
CPPUNIT_TEST( UTF8 );
CPPUNIT_TEST( StreamUTF8NoBOM );
CPPUNIT_TEST( StreamUTF8 );
CPPUNIT_TEST( StreamUTF16LE );
CPPUNIT_TEST( StreamUTF16BE );
CPPUNIT_TEST( StreamUTF32LE );
CPPUNIT_TEST( StreamUTF32BE );
CPPUNIT_TEST_SUITE_END();
// real test function: check that converting the src multibyte string to
@@ -57,6 +63,19 @@ private:
void UTF16LE();
void UTF16BE();
void UTF8();
// test whether two lines of text are converted properly from a stream
void TestTextStream(const char *src,
size_t srclength,
const wxString& line1,
const wxString& line2);
void StreamUTF8NoBOM();
void StreamUTF8();
void StreamUTF16LE();
void StreamUTF16BE();
void StreamUTF32LE();
void StreamUTF32BE();
};
// register in the unnamed registry so that these tests are run by default
@@ -118,5 +137,76 @@ void ConvAutoTestCase::UTF8()
#endif
}
void ConvAutoTestCase::TestTextStream(const char *src,
size_t srclength,
const wxString& line1,
const wxString& line2)
{
wxMemoryInputStream instream(src, srclength);
wxTextInputStream text(instream);
CPPUNIT_ASSERT_EQUAL( line1, text.ReadLine() );
CPPUNIT_ASSERT_EQUAL( line2, text.ReadLine() );
}
// the first line of the teststring used in the following functions is an
// 'a' followed by a Japanese hiragana A (u+3042).
// The second line is a single Greek beta (u+03B2). There is no blank line
// at the end.
namespace
{
const wxString line1 = wxString::FromUTF8("a\xe3\x81\x82");
const wxString line2 = wxString::FromUTF8("\xce\xb2");
} // anonymous namespace
void ConvAutoTestCase::StreamUTF8NoBOM()
{
// currently this test doesn't work because without the BOM wxConvAuto
// decides that the string is in Latin-1 after finding the first (but not
// the two subsequent ones which are part of the same UTF-8 sequence!)
// 8-bit character
//
// FIXME: we need to fix this at wxTextInputStream level, see #11570
#if 0
TestTextStream("\x61\xE3\x81\x82\x0A\xCE\xB2",
7, line1, line2);
#endif
}
void ConvAutoTestCase::StreamUTF8()
{
TestTextStream("\xEF\xBB\xBF\x61\xE3\x81\x82\x0A\xCE\xB2",
10, line1, line2);
}
void ConvAutoTestCase::StreamUTF16LE()
{
TestTextStream("\xFF\xFE\x61\x00\x42\x30\x0A\x00\xB2\x03",
10, line1, line2);
}
void ConvAutoTestCase::StreamUTF16BE()
{
TestTextStream("\xFE\xFF\x00\x61\x30\x42\x00\x0A\x03\xB2",
10, line1, line2);
}
void ConvAutoTestCase::StreamUTF32LE()
{
TestTextStream("\xFF\xFE\0\0\x61\x00\0\0\x42\x30\0\0\x0A"
"\x00\0\0\xB2\x03\0\0",
20, line1, line2);
}
void ConvAutoTestCase::StreamUTF32BE()
{
TestTextStream("\0\0\xFE\xFF\0\0\x00\x61\0\0\x30\x42\0\0\x00\x0A"
"\0\0\x03\xB2",
20, line1, line2);
}
#endif // wxUSE_WCHAR_T