From af83769bd097b1416d8837ca0b30bfaa7e3bb21f Mon Sep 17 00:00:00 2001 From: Vadim Zeitlin Date: Sun, 21 Apr 2019 20:04:52 +0200 Subject: [PATCH] Add wxMBConv::GetMaxCharLen() This is not used yet, but will be needed soon in order to determine whether we have sufficiently many to decode them. --- include/wx/strconv.h | 20 ++++++++++++++++++++ interface/wx/strconv.h | 20 ++++++++++++++++++++ 2 files changed, 40 insertions(+) diff --git a/include/wx/strconv.h b/include/wx/strconv.h index 207a235605..d344a5a911 100644 --- a/include/wx/strconv.h +++ b/include/wx/strconv.h @@ -32,6 +32,8 @@ class WXDLLIMPEXP_FWD_BASE wxString; // don't let the fact that the existing classes implement MB2WC/WC2MB() instead // confuse you. // +// For many encodings you must override GetMaxCharLen(). +// // You also have to implement Clone() to allow copying the conversions // polymorphically. // @@ -118,6 +120,10 @@ public: wxWCharBuffer cWX2WC(const char *psz) const { return cMB2WC(psz); } #endif // Unicode/ANSI + // return the maximum number of bytes that can be required to encode a + // single character in this encoding, e.g. 4 for UTF-8 + virtual size_t GetMaxCharLen() const { return 1; } + // this function is used in the implementation of cMB2WC() to distinguish // between the following cases: // @@ -254,6 +260,8 @@ public: virtual size_t FromWChar(char *dst, size_t dstLen, const wchar_t *src, size_t srcLen = wxNO_LEN) const wxOVERRIDE; + virtual size_t GetMaxCharLen() const wxOVERRIDE { return 4; } + virtual wxMBConv *Clone() const wxOVERRIDE { return new wxMBConvUTF7; } private: @@ -341,6 +349,8 @@ public: virtual size_t FromWChar(char *dst, size_t dstLen, const wchar_t *src, size_t srcLen = wxNO_LEN) const wxOVERRIDE; + virtual size_t GetMaxCharLen() const wxOVERRIDE { return 4; } + virtual wxMBConv *Clone() const wxOVERRIDE { return new wxMBConvStrictUTF8(); } // NB: other mapping modes are not, strictly speaking, UTF-8, so we can't @@ -365,6 +375,8 @@ public: virtual size_t FromWChar(char *dst, size_t dstLen, const wchar_t *src, size_t srcLen = wxNO_LEN) const wxOVERRIDE; + virtual size_t GetMaxCharLen() const wxOVERRIDE { return 4; } + virtual wxMBConv *Clone() const wxOVERRIDE { return new wxMBConvUTF8(m_options); } // NB: other mapping modes are not, strictly speaking, UTF-8, so we can't @@ -405,6 +417,7 @@ public: const char *src, size_t srcLen = wxNO_LEN) const wxOVERRIDE; virtual size_t FromWChar(char *dst, size_t dstLen, const wchar_t *src, size_t srcLen = wxNO_LEN) const wxOVERRIDE; + virtual size_t GetMaxCharLen() const wxOVERRIDE { return 4; } virtual wxMBConv *Clone() const wxOVERRIDE { return new wxMBConvUTF16LE; } }; @@ -419,6 +432,7 @@ public: const char *src, size_t srcLen = wxNO_LEN) const wxOVERRIDE; virtual size_t FromWChar(char *dst, size_t dstLen, const wchar_t *src, size_t srcLen = wxNO_LEN) const wxOVERRIDE; + virtual size_t GetMaxCharLen() const wxOVERRIDE { return 4; } virtual wxMBConv *Clone() const wxOVERRIDE { return new wxMBConvUTF16BE; } }; @@ -451,6 +465,7 @@ public: const char *src, size_t srcLen = wxNO_LEN) const wxOVERRIDE; virtual size_t FromWChar(char *dst, size_t dstLen, const wchar_t *src, size_t srcLen = wxNO_LEN) const wxOVERRIDE; + virtual size_t GetMaxCharLen() const wxOVERRIDE { return 4; } virtual wxMBConv *Clone() const wxOVERRIDE { return new wxMBConvUTF32LE; } }; @@ -465,6 +480,7 @@ public: const char *src, size_t srcLen = wxNO_LEN) const wxOVERRIDE; virtual size_t FromWChar(char *dst, size_t dstLen, const wchar_t *src, size_t srcLen = wxNO_LEN) const wxOVERRIDE; + virtual size_t GetMaxCharLen() const wxOVERRIDE { return 4; } virtual wxMBConv *Clone() const wxOVERRIDE { return new wxMBConvUTF32BE; } }; @@ -566,6 +582,10 @@ public: FromWChar(char *dst, size_t dstLen, const wchar_t *src, size_t srcLen = wxNO_LEN) const wxOVERRIDE; + // Use the value for UTF-8 here to make sure we try to decode up to 4 bytes + // as UTF-8 before giving up. + virtual size_t GetMaxCharLen() const wxOVERRIDE { return 4; } + virtual wxMBConv *Clone() const wxOVERRIDE { return new wxWhateverWorksConv(); diff --git a/interface/wx/strconv.h b/interface/wx/strconv.h index 00dfb359e6..7a2055495f 100644 --- a/interface/wx/strconv.h +++ b/interface/wx/strconv.h @@ -48,6 +48,26 @@ public: */ virtual wxMBConv* Clone() const = 0; + /** + This function must be overridden in the derived classes to return the + maximum length, in bytes, of a single Unicode character representation + in this encoding. + + As a consequence, the conversion object must be able to decode any + valid sequence of bytes in the corresponding encoding if it's at least + that many bytes long, but may fail if it is shorter. For example, for + UTF-8 the maximum character length is 4, as 3 bytes or less may be + insufficient to represent a Unicode character in UTF-8, but 4 are + always enough. + + For compatibility reasons, this method is not pure virtual and returns + 1 by default in the base class, however it should be always overridden + in the derived classes. + + @since 3.1.3 + */ + virtual size_t GetMaxCharLen() const; + /** This function returns 1 for most of the multibyte encodings in which the string is terminated by a single @c NUL, 2 for UTF-16 and 4 for UTF-32 for