From d94f3f5aba86b0b8c7ef6bb10945dfc0bccdfe84 Mon Sep 17 00:00:00 2001 From: Vadim Zeitlin Date: Wed, 29 Jan 2014 22:25:14 +0000 Subject: [PATCH] Fix bug with non-NUL-terminaed inputs in wxMBConvUTF8. We read beyond the provided maximal length as we didn't update the remaining length while parsing the remaining bytes of an UTF-8-encoded code point. Fix this and add a test for it. Closes #15901. git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/trunk@75733 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775 --- src/common/strconv.cpp | 10 ++++++++++ tests/strings/unicode.cpp | 7 +++++++ 2 files changed, 17 insertions(+) diff --git a/src/common/strconv.cpp b/src/common/strconv.cpp index a433128011..6c671f0d9f 100644 --- a/src/common/strconv.cpp +++ b/src/common/strconv.cpp @@ -1272,6 +1272,14 @@ size_t wxMBConvUTF8::ToWChar(wchar_t *buf, size_t n, wxUint32 res = cc & (0x3f >> cnt); while (cnt--) { + if (!isNulTerminated && !srcLen) + { + // invalid UTF-8 sequence ending before the end of code + // point. + invalid = true; + break; + } + cc = *psz; if ((cc & 0xC0) != 0x80) { @@ -1281,6 +1289,8 @@ size_t wxMBConvUTF8::ToWChar(wchar_t *buf, size_t n, } psz++; + if (!isNulTerminated) + srcLen--; res = (res << 6) | (cc & 0x3f); } diff --git a/tests/strings/unicode.cpp b/tests/strings/unicode.cpp index 29b7777a2a..2fd2ad3a6a 100644 --- a/tests/strings/unicode.cpp +++ b/tests/strings/unicode.cpp @@ -352,6 +352,13 @@ void UnicodeTestCase::ConversionUTF8() d.Test(n, conv); d.Test(n, wxConvUTF8); } + + static const char* const u25a6 = "\xe2\x96\xa6"; + wxMBConvUTF8 c(wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL); + CPPUNIT_ASSERT_EQUAL( 2, c.ToWChar(NULL, 0, u25a6, wxNO_LEN) ); + CPPUNIT_ASSERT_EQUAL( 0, c.ToWChar(NULL, 0, u25a6, 0) ); + CPPUNIT_ASSERT_EQUAL( 1, c.ToWChar(NULL, 0, u25a6, 3) ); + CPPUNIT_ASSERT_EQUAL( 2, c.ToWChar(NULL, 0, u25a6, 4) ); } void UnicodeTestCase::ConversionUTF16()