From 18063f38ffb44e7b6f38f36ebdbaa5984c98529d Mon Sep 17 00:00:00 2001 From: Vadim Zeitlin Date: Thu, 2 Nov 2017 01:02:17 +0100 Subject: [PATCH 1/5] Update comment describing wxMBConv::cMB2WC() and cWC2MB() Don't say that these functions return NULL pointers when they don't return pointers at all any more (and since quite some time). --- include/wx/strconv.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/wx/strconv.h b/include/wx/strconv.h index d816bc43d9..d2f6fe4a8c 100644 --- a/include/wx/strconv.h +++ b/include/wx/strconv.h @@ -71,8 +71,8 @@ public: const wchar_t *src, size_t srcLen = wxNO_LEN) const; - // Convenience functions for translating NUL-terminated strings: returns - // the buffer containing the converted string or NULL pointer if the + // Convenience functions for translating NUL-terminated strings: return + // the buffer containing the converted string or empty buffer if the // conversion failed. const wxWCharBuffer cMB2WC(const char *in) const; const wxCharBuffer cWC2MB(const wchar_t *in) const; From 6e7254a7a12efba2e4d85048c0208d3c2ad0005c Mon Sep 17 00:00:00 2001 From: Vadim Zeitlin Date: Thu, 2 Nov 2017 01:04:02 +0100 Subject: [PATCH 2/5] Move both wxMBConv::cXX2YY() overloads together No real changes, just group the overloads in a more logical way. --- include/wx/strconv.h | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/include/wx/strconv.h b/include/wx/strconv.h index d2f6fe4a8c..a916349f60 100644 --- a/include/wx/strconv.h +++ b/include/wx/strconv.h @@ -77,6 +77,10 @@ public: const wxWCharBuffer cMB2WC(const char *in) const; const wxCharBuffer cWC2MB(const wchar_t *in) const; + const wxWCharBuffer cMB2WC(const wxScopedCharBuffer& in) const; + const wxCharBuffer cWC2MB(const wxScopedWCharBuffer& in) const; + + // Convenience functions for converting strings which may contain embedded // NULs and don't have to be NUL-terminated. // @@ -97,12 +101,6 @@ public: const wxCharBuffer cWC2MB(const wchar_t *in, size_t inLen, size_t *outLen) const; - // And yet more convenience functions for converting the entire buffers: - // these are the simplest and least error-prone as you never need to bother - // with lengths/sizes directly. - const wxWCharBuffer cMB2WC(const wxScopedCharBuffer& in) const; - const wxCharBuffer cWC2MB(const wxScopedWCharBuffer& in) const; - // convenience functions for converting MB or WC to/from wxWin default #if wxUSE_UNICODE const wxWCharBuffer cMB2WX(const char *psz) const { return cMB2WC(psz); } From b3fe07942f731cfebb8270b2fea2dd8eab66ae45 Mon Sep 17 00:00:00 2001 From: Vadim Zeitlin Date: Thu, 2 Nov 2017 01:07:00 +0100 Subject: [PATCH 3/5] Remove top level "const" from wxMBConv methods return values This "const" is useless and doesn't actually do anything, remove it to avoid confusion. --- include/wx/strconv.h | 20 ++++++++++---------- interface/wx/strconv.h | 16 ++++++++-------- src/common/strconv.cpp | 12 ++++++------ 3 files changed, 24 insertions(+), 24 deletions(-) diff --git a/include/wx/strconv.h b/include/wx/strconv.h index a916349f60..8f5569356d 100644 --- a/include/wx/strconv.h +++ b/include/wx/strconv.h @@ -74,11 +74,11 @@ public: // Convenience functions for translating NUL-terminated strings: return // the buffer containing the converted string or empty buffer if the // conversion failed. - const wxWCharBuffer cMB2WC(const char *in) const; - const wxCharBuffer cWC2MB(const wchar_t *in) const; + wxWCharBuffer cMB2WC(const char *in) const; + wxCharBuffer cWC2MB(const wchar_t *in) const; - const wxWCharBuffer cMB2WC(const wxScopedCharBuffer& in) const; - const wxCharBuffer cWC2MB(const wxScopedWCharBuffer& in) const; + wxWCharBuffer cMB2WC(const wxScopedCharBuffer& in) const; + wxCharBuffer cWC2MB(const wxScopedWCharBuffer& in) const; // Convenience functions for converting strings which may contain embedded @@ -96,22 +96,22 @@ public: // number of characters converted, whether the last one of them was NUL or // not. But if inLen == wxNO_LEN then outLen doesn't account for the last // NUL even though it is present. - const wxWCharBuffer + wxWCharBuffer cMB2WC(const char *in, size_t inLen, size_t *outLen) const; - const wxCharBuffer + wxCharBuffer cWC2MB(const wchar_t *in, size_t inLen, size_t *outLen) const; // convenience functions for converting MB or WC to/from wxWin default #if wxUSE_UNICODE - const wxWCharBuffer cMB2WX(const char *psz) const { return cMB2WC(psz); } - const wxCharBuffer cWX2MB(const wchar_t *psz) const { return cWC2MB(psz); } + wxWCharBuffer cMB2WX(const char *psz) const { return cMB2WC(psz); } + wxCharBuffer cWX2MB(const wchar_t *psz) const { return cWC2MB(psz); } const wchar_t* cWC2WX(const wchar_t *psz) const { return psz; } const wchar_t* cWX2WC(const wchar_t *psz) const { return psz; } #else // ANSI const char* cMB2WX(const char *psz) const { return psz; } const char* cWX2MB(const char *psz) const { return psz; } - const wxCharBuffer cWC2WX(const wchar_t *psz) const { return cWC2MB(psz); } - const wxWCharBuffer cWX2WC(const char *psz) const { return cMB2WC(psz); } + wxCharBuffer cWC2WX(const wchar_t *psz) const { return cWC2MB(psz); } + wxWCharBuffer cWX2WC(const char *psz) const { return cMB2WC(psz); } #endif // Unicode/ANSI // this function is used in the implementation of cMB2WC() to distinguish diff --git a/interface/wx/strconv.h b/interface/wx/strconv.h index 7bd65e2587..111a07ce54 100644 --- a/interface/wx/strconv.h +++ b/interface/wx/strconv.h @@ -190,7 +190,7 @@ public: invalid and @a outLen is set to 0 (and not @c wxCONV_FAILED for compatibility concerns). */ - const wxWCharBuffer cMB2WC(const char* in, + wxWCharBuffer cMB2WC(const char* in, size_t inLen, size_t *outLen) const; @@ -209,7 +209,7 @@ public: @since 2.9.1 */ - const wxWCharBuffer cMB2WC(const wxCharBuffer& buf) const; + wxWCharBuffer cMB2WC(const wxCharBuffer& buf) const; //@{ /** @@ -221,7 +221,7 @@ public: is defined as the correct return type (without const). */ const char* cMB2WX(const char* psz) const; - const wxWCharBuffer cMB2WX(const char* psz) const; + wxWCharBuffer cMB2WX(const char* psz) const; //@} /** @@ -234,7 +234,7 @@ public: Its parameters have the same meaning as the corresponding parameters of FromWChar(), please see the description of cMB2WC() for more details. */ - const wxCharBuffer cWC2MB(const wchar_t* in, + wxCharBuffer cWC2MB(const wchar_t* in, size_t inLen, size_t *outLen) const; @@ -253,7 +253,7 @@ public: @since 2.9.1 */ - const wxCharBuffer cWC2MB(const wxWCharBuffer& buf) const; + wxCharBuffer cWC2MB(const wxWCharBuffer& buf) const; //@{ /** @@ -264,7 +264,7 @@ public: defined as the correct return type (without const). */ const wchar_t* cWC2WX(const wchar_t* psz) const; - const wxCharBuffer cWC2WX(const wchar_t* psz) const; + wxCharBuffer cWC2WX(const wchar_t* psz) const; //@} //@{ @@ -276,7 +276,7 @@ public: is defined as the correct return type (without const). */ const char* cWX2MB(const wxChar* psz) const; - const wxCharBuffer cWX2MB(const wxChar* psz) const; + wxCharBuffer cWX2MB(const wxChar* psz) const; //@} //@{ @@ -288,7 +288,7 @@ public: defined as the correct return type (without const). */ const wchar_t* cWX2WC(const wxChar* psz) const; - const wxWCharBuffer cWX2WC(const wxChar* psz) const; + wxWCharBuffer cWX2WC(const wxChar* psz) const; //@} /** diff --git a/src/common/strconv.cpp b/src/common/strconv.cpp index 17fb7233c4..e22f22bbea 100644 --- a/src/common/strconv.cpp +++ b/src/common/strconv.cpp @@ -399,7 +399,7 @@ wxMBConv::~wxMBConv() // nothing to do here (necessary for Darwin linking probably) } -const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const +wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const { if ( psz ) { @@ -419,7 +419,7 @@ const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const return wxWCharBuffer(); } -const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const +wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const { if ( pwz ) { @@ -435,7 +435,7 @@ const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const return wxCharBuffer(); } -const wxWCharBuffer +wxWCharBuffer wxMBConv::cMB2WC(const char *inBuff, size_t inLen, size_t *outLen) const { const size_t dstLen = ToWChar(NULL, 0, inBuff, inLen); @@ -470,7 +470,7 @@ wxMBConv::cMB2WC(const char *inBuff, size_t inLen, size_t *outLen) const return wxWCharBuffer(); } -const wxCharBuffer +wxCharBuffer wxMBConv::cWC2MB(const wchar_t *inBuff, size_t inLen, size_t *outLen) const { size_t dstLen = FromWChar(NULL, 0, inBuff, inLen); @@ -511,7 +511,7 @@ wxMBConv::cWC2MB(const wchar_t *inBuff, size_t inLen, size_t *outLen) const return wxCharBuffer(); } -const wxWCharBuffer wxMBConv::cMB2WC(const wxScopedCharBuffer& buf) const +wxWCharBuffer wxMBConv::cMB2WC(const wxScopedCharBuffer& buf) const { const size_t srcLen = buf.length(); if ( srcLen ) @@ -529,7 +529,7 @@ const wxWCharBuffer wxMBConv::cMB2WC(const wxScopedCharBuffer& buf) const return wxScopedWCharBuffer::CreateNonOwned(L"", 0); } -const wxCharBuffer wxMBConv::cWC2MB(const wxScopedWCharBuffer& wbuf) const +wxCharBuffer wxMBConv::cWC2MB(const wxScopedWCharBuffer& wbuf) const { const size_t srcLen = wbuf.length(); if ( srcLen ) From 8bf239f8e4b0d9cec2b13c9c9cd8a0af60eb81dc Mon Sep 17 00:00:00 2001 From: Vadim Zeitlin Date: Thu, 2 Nov 2017 01:27:24 +0100 Subject: [PATCH 4/5] Make wxMBConv dtor inline The Darwin linking problem mentioned in the comment doesn't exist in any of the still supported macOS versions, so it doesn't make sense to continue working around it. --- include/wx/strconv.h | 2 +- src/common/strconv.cpp | 5 ----- 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/include/wx/strconv.h b/include/wx/strconv.h index 8f5569356d..6822ca62d1 100644 --- a/include/wx/strconv.h +++ b/include/wx/strconv.h @@ -160,7 +160,7 @@ public: virtual wxMBConv *Clone() const = 0; // virtual dtor for any base class - virtual ~wxMBConv(); + virtual ~wxMBConv() { } }; // ---------------------------------------------------------------------------- diff --git a/src/common/strconv.cpp b/src/common/strconv.cpp index e22f22bbea..94ff70d1b0 100644 --- a/src/common/strconv.cpp +++ b/src/common/strconv.cpp @@ -394,11 +394,6 @@ size_t wxMBConv::WC2MB(char *outBuff, const wchar_t *inBuff, size_t outLen) cons return rc; } -wxMBConv::~wxMBConv() -{ - // nothing to do here (necessary for Darwin linking probably) -} - wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const { if ( psz ) From c47acbeb523420fc954308609bb1d6ccf476bd9f Mon Sep 17 00:00:00 2001 From: Vadim Zeitlin Date: Thu, 2 Nov 2017 01:19:01 +0100 Subject: [PATCH 5/5] Fix wxMBConv::cWC2MB() and cMB2WC() returned buffer length This commit refactors the overloads of cMB2WC() and cWC2MB() methods taking raw pointers and buffers to reuse the same code and fixes the wrong length of the buffer returned by cWC2MB(wchar_t*) overload for conversions using multiple bytes to represent the NUL terminator character (it previously was wrong for UTF-16 and UTF-32 conversions due to wrongly subtracting 1 from the length when creating it instead of correctly subtracting GetMBNulLen()) and the wrong length of the buffer returned from cMB2WC(char*) overload where no adjustment for the trailing NUL was done at all. Also return simple default-constructed buffers from these methods in case of failure instead of using wxScopedCharBuffer::CreateNonOwned() which is less obvious and less efficient (even if the latter probably doesn't matter here because it's only done in case of an error). Finally, add tests checking that using WC2MB() or either of cWC2MB() overloads returns the buffers of the same length and with the same contents. --- include/wx/strconv.h | 17 ++++++--- src/common/strconv.cpp | 69 ++++++++++++++----------------------- tests/mbconv/mbconvtest.cpp | 43 +++++++++++++++++++++++ 3 files changed, 81 insertions(+), 48 deletions(-) diff --git a/include/wx/strconv.h b/include/wx/strconv.h index 6822ca62d1..207a235605 100644 --- a/include/wx/strconv.h +++ b/include/wx/strconv.h @@ -74,11 +74,15 @@ public: // Convenience functions for translating NUL-terminated strings: return // the buffer containing the converted string or empty buffer if the // conversion failed. - wxWCharBuffer cMB2WC(const char *in) const; - wxCharBuffer cWC2MB(const wchar_t *in) const; + wxWCharBuffer cMB2WC(const char *in) const + { return DoConvertMB2WC(in, wxNO_LEN); } + wxCharBuffer cWC2MB(const wchar_t *in) const + { return DoConvertWC2MB(in, wxNO_LEN); } - wxWCharBuffer cMB2WC(const wxScopedCharBuffer& in) const; - wxCharBuffer cWC2MB(const wxScopedWCharBuffer& in) const; + wxWCharBuffer cMB2WC(const wxScopedCharBuffer& in) const + { return DoConvertMB2WC(in, in.length()); } + wxCharBuffer cWC2MB(const wxScopedWCharBuffer& in) const + { return DoConvertWC2MB(in, in.length()); } // Convenience functions for converting strings which may contain embedded @@ -161,6 +165,11 @@ public: // virtual dtor for any base class virtual ~wxMBConv() { } + +private: + // Common part of single argument cWC2MB() and cMB2WC() overloads above. + wxCharBuffer DoConvertWC2MB(const wchar_t* pwz, size_t srcLen) const; + wxWCharBuffer DoConvertMB2WC(const char* psz, size_t srcLen) const; }; // ---------------------------------------------------------------------------- diff --git a/src/common/strconv.cpp b/src/common/strconv.cpp index 94ff70d1b0..11d1adbc54 100644 --- a/src/common/strconv.cpp +++ b/src/common/strconv.cpp @@ -394,42 +394,6 @@ size_t wxMBConv::WC2MB(char *outBuff, const wchar_t *inBuff, size_t outLen) cons return rc; } -wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const -{ - if ( psz ) - { - // calculate the length of the buffer needed first - const size_t nLen = ToWChar(NULL, 0, psz); - if ( nLen != wxCONV_FAILED ) - { - // now do the actual conversion - wxWCharBuffer buf(nLen - 1 /* +1 added implicitly */); - - // +1 for the trailing NULL - if ( ToWChar(buf.data(), nLen, psz) != wxCONV_FAILED ) - return buf; - } - } - - return wxWCharBuffer(); -} - -wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const -{ - if ( pwz ) - { - const size_t nLen = FromWChar(NULL, 0, pwz); - if ( nLen != wxCONV_FAILED ) - { - wxCharBuffer buf(nLen - 1); - if ( FromWChar(buf.data(), nLen, pwz) != wxCONV_FAILED ) - return buf; - } - } - - return wxCharBuffer(); -} - wxWCharBuffer wxMBConv::cMB2WC(const char *inBuff, size_t inLen, size_t *outLen) const { @@ -506,10 +470,13 @@ wxMBConv::cWC2MB(const wchar_t *inBuff, size_t inLen, size_t *outLen) const return wxCharBuffer(); } -wxWCharBuffer wxMBConv::cMB2WC(const wxScopedCharBuffer& buf) const +wxWCharBuffer wxMBConv::DoConvertMB2WC(const char* buf, size_t srcLen) const { - const size_t srcLen = buf.length(); - if ( srcLen ) + // Notice that converting NULL pointer should work, i.e. return an empty + // buffer instead of crashing, so we need to check both the length and the + // pointer because length is wxNO_LEN if it's a raw pointer and doesn't + // come from wxScopedCharBuffer. + if ( srcLen && buf ) { const size_t dstLen = ToWChar(NULL, 0, buf, srcLen); if ( dstLen != wxCONV_FAILED ) @@ -517,17 +484,24 @@ wxWCharBuffer wxMBConv::cMB2WC(const wxScopedCharBuffer& buf) const wxWCharBuffer wbuf(dstLen); wbuf.data()[dstLen] = L'\0'; if ( ToWChar(wbuf.data(), dstLen, buf, srcLen) != wxCONV_FAILED ) + { + // If the input string was NUL-terminated, we shouldn't include + // the length of the trailing NUL into the length of the return + // value. + if ( srcLen == wxNO_LEN ) + wbuf.shrink(dstLen - 1); + return wbuf; + } } } - return wxScopedWCharBuffer::CreateNonOwned(L"", 0); + return wxWCharBuffer(); } -wxCharBuffer wxMBConv::cWC2MB(const wxScopedWCharBuffer& wbuf) const +wxCharBuffer wxMBConv::DoConvertWC2MB(const wchar_t* wbuf, size_t srcLen) const { - const size_t srcLen = wbuf.length(); - if ( srcLen ) + if ( srcLen && wbuf ) { const size_t dstLen = FromWChar(NULL, 0, wbuf, srcLen); if ( dstLen != wxCONV_FAILED ) @@ -535,11 +509,18 @@ wxCharBuffer wxMBConv::cWC2MB(const wxScopedWCharBuffer& wbuf) const wxCharBuffer buf(dstLen); buf.data()[dstLen] = '\0'; if ( FromWChar(buf.data(), dstLen, wbuf, srcLen) != wxCONV_FAILED ) + { + // As above, in DoConvertMB2WC(), except that the length of the + // trailing NUL is variable in this case. + if ( srcLen == wxNO_LEN ) + buf.shrink(dstLen - GetMBNulLen()); + return buf; + } } } - return wxScopedCharBuffer::CreateNonOwned("", 0); + return wxCharBuffer(); } // ---------------------------------------------------------------------------- diff --git a/tests/mbconv/mbconvtest.cpp b/tests/mbconv/mbconvtest.cpp index 124a9008a3..0b2b1507c6 100644 --- a/tests/mbconv/mbconvtest.cpp +++ b/tests/mbconv/mbconvtest.cpp @@ -874,6 +874,16 @@ void MBConvTestCase::BufSize() CPPUNIT_ASSERT( convUTF16.WC2MB(buf.data(), utf16text, lenMB + 3) != wxCONV_FAILED ); CPPUNIT_ASSERT_EQUAL( '?', buf[lenMB + 2] ); + + // Test cWC2MB() too. + const wxCharBuffer buf2 = convUTF16.cWC2MB(utf16text); + CHECK( buf2.length() == lenMB ); + CHECK( memcmp(buf, buf2, lenMB) == 0 ); + + const wxWCharBuffer utf16buf = wxWCharBuffer::CreateNonOwned(utf16text); + const wxCharBuffer buf3 = convUTF16.cWC2MB(utf16buf); + CHECK( buf3.length() == lenMB ); + CHECK( memcmp(buf, buf3, lenMB) == 0 ); } void MBConvTestCase::FromWCharTests() @@ -1448,3 +1458,36 @@ void MBConvTestCase::UTF8(const char *charSequence, } #endif // HAVE_WCHAR_H + +TEST_CASE("wxMBConv::cWC2MB", "[mbconv][wc2mb]") +{ + wxMBConvUTF16 convUTF16; + + CHECK( convUTF16.cWC2MB(L"").length() == 0 ); + CHECK( convUTF16.cWC2MB(wxWCharBuffer()).length() == 0 ); + CHECK( convUTF16.cWC2MB(L"Hi").length() == 4 ); + CHECK( convUTF16.cWC2MB(wxWCharBuffer::CreateNonOwned(L"Hi")).length() == 4 ); + + CHECK( wxConvUTF7.cWC2MB(L"").length() == 0 ); + CHECK( wxConvUTF7.cWC2MB(wxWCharBuffer()).length() == 0 ); + CHECK( wxConvUTF7.cWC2MB(L"\xa3").length() == 5 ); + // This test currently fails, the returned value is 3 because the + // conversion object doesn't return to its unshifted state -- which is + // probably a bug in wxMBConvUTF7. + // TODO: fix it there and reenable the test. + CHECK_NOFAIL( wxConvUTF7.cWC2MB(wxWCharBuffer::CreateNonOwned(L"\xa3")).length() == 5 ); +} + +TEST_CASE("wxMBConv::cMB2WC", "[mbconv][mb2wc]") +{ + wxMBConvUTF16 convUTF16; + + CHECK( convUTF16.cMB2WC("\0").length() == 0 ); + CHECK( convUTF16.cMB2WC(wxCharBuffer()).length() == 0 ); + CHECK( convUTF16.cMB2WC("H\0i\0").length() == 2 ); + CHECK( convUTF16.cMB2WC(wxCharBuffer::CreateNonOwned("H\0i\0", 4)).length() == 2 ); + + CHECK( wxConvUTF7.cMB2WC("").length() == 0 ); + CHECK( wxConvUTF7.cMB2WC(wxCharBuffer()).length() == 0 ); + CHECK( wxConvUTF7.cMB2WC("+AKM-").length() == 1 ); +}