Add wxMBConv::GetMaxCharLen()

This is not used yet, but will be needed soon in order to determine whether we have sufficiently many to decode them.
2019-04-21 20:04:52 +02:00
parent bd37af32b1
commit af83769bd0
2 changed files with 40 additions and 0 deletions
--- a/include/wx/strconv.h
+++ b/include/wx/strconv.h
@@ -32,6 +32,8 @@ class WXDLLIMPEXP_FWD_BASE wxString;
 // don't let the fact that the existing classes implement MB2WC/WC2MB() instead
 // confuse you.
 //
+// For many encodings you must override GetMaxCharLen().
+//
 // You also have to implement Clone() to allow copying the conversions
 // polymorphically.
 //
@@ -118,6 +120,10 @@ public:
    wxWCharBuffer cWX2WC(const char *psz) const { return cMB2WC(psz); }
 #endif // Unicode/ANSI

+    // return the maximum number of bytes that can be required to encode a
+    // single character in this encoding, e.g. 4 for UTF-8
+    virtual size_t GetMaxCharLen() const { return 1; }
+
    // this function is used in the implementation of cMB2WC() to distinguish
    // between the following cases:
    //
@@ -254,6 +260,8 @@ public:
    virtual size_t FromWChar(char *dst, size_t dstLen,
                             const wchar_t *src, size_t srcLen = wxNO_LEN) const wxOVERRIDE;

+    virtual size_t GetMaxCharLen() const wxOVERRIDE { return 4; }
+
    virtual wxMBConv *Clone() const wxOVERRIDE { return new wxMBConvUTF7; }

 private:
@@ -341,6 +349,8 @@ public:
    virtual size_t FromWChar(char *dst, size_t dstLen,
                             const wchar_t *src, size_t srcLen = wxNO_LEN) const wxOVERRIDE;

+    virtual size_t GetMaxCharLen() const wxOVERRIDE { return 4; }
+
    virtual wxMBConv *Clone() const wxOVERRIDE { return new wxMBConvStrictUTF8(); }

    // NB: other mapping modes are not, strictly speaking, UTF-8, so we can't
@@ -365,6 +375,8 @@ public:
    virtual size_t FromWChar(char *dst, size_t dstLen,
                             const wchar_t *src, size_t srcLen = wxNO_LEN) const wxOVERRIDE;

+    virtual size_t GetMaxCharLen() const wxOVERRIDE { return 4; }
+
    virtual wxMBConv *Clone() const wxOVERRIDE { return new wxMBConvUTF8(m_options); }

    // NB: other mapping modes are not, strictly speaking, UTF-8, so we can't
@@ -405,6 +417,7 @@ public:
                           const char *src, size_t srcLen = wxNO_LEN) const wxOVERRIDE;
    virtual size_t FromWChar(char *dst, size_t dstLen,
                             const wchar_t *src, size_t srcLen = wxNO_LEN) const wxOVERRIDE;
+    virtual size_t GetMaxCharLen() const wxOVERRIDE { return 4; }
    virtual wxMBConv *Clone() const wxOVERRIDE { return new wxMBConvUTF16LE; }
 };

@@ -419,6 +432,7 @@ public:
                           const char *src, size_t srcLen = wxNO_LEN) const wxOVERRIDE;
    virtual size_t FromWChar(char *dst, size_t dstLen,
                             const wchar_t *src, size_t srcLen = wxNO_LEN) const wxOVERRIDE;
+    virtual size_t GetMaxCharLen() const wxOVERRIDE { return 4; }
    virtual wxMBConv *Clone() const wxOVERRIDE { return new wxMBConvUTF16BE; }
 };

@@ -451,6 +465,7 @@ public:
                           const char *src, size_t srcLen = wxNO_LEN) const wxOVERRIDE;
    virtual size_t FromWChar(char *dst, size_t dstLen,
                             const wchar_t *src, size_t srcLen = wxNO_LEN) const wxOVERRIDE;
+    virtual size_t GetMaxCharLen() const wxOVERRIDE { return 4; }
    virtual wxMBConv *Clone() const wxOVERRIDE { return new wxMBConvUTF32LE; }
 };

@@ -465,6 +480,7 @@ public:
                           const char *src, size_t srcLen = wxNO_LEN) const wxOVERRIDE;
    virtual size_t FromWChar(char *dst, size_t dstLen,
                             const wchar_t *src, size_t srcLen = wxNO_LEN) const wxOVERRIDE;
+    virtual size_t GetMaxCharLen() const wxOVERRIDE { return 4; }
    virtual wxMBConv *Clone() const wxOVERRIDE { return new wxMBConvUTF32BE; }
 };

@@ -566,6 +582,10 @@ public:
    FromWChar(char *dst, size_t dstLen,
              const wchar_t *src, size_t srcLen = wxNO_LEN) const wxOVERRIDE;

+    // Use the value for UTF-8 here to make sure we try to decode up to 4 bytes
+    // as UTF-8 before giving up.
+    virtual size_t GetMaxCharLen() const wxOVERRIDE { return 4; }
+
    virtual wxMBConv *Clone() const wxOVERRIDE
    {
        return new wxWhateverWorksConv();
--- a/interface/wx/strconv.h
+++ b/interface/wx/strconv.h
@@ -48,6 +48,26 @@ public:
    */
    virtual wxMBConv* Clone() const = 0;

+    /**
+        This function must be overridden in the derived classes to return the
+        maximum length, in bytes, of a single Unicode character representation
+        in this encoding.
+
+        As a consequence, the conversion object must be able to decode any
+        valid sequence of bytes in the corresponding encoding if it's at least
+        that many bytes long, but may fail if it is shorter. For example, for
+        UTF-8 the maximum character length is 4, as 3 bytes or less may be
+        insufficient to represent a Unicode character in UTF-8, but 4 are
+        always enough.
+
+        For compatibility reasons, this method is not pure virtual and returns
+        1 by default in the base class, however it should be always overridden
+        in the derived classes.
+
+        @since 3.1.3
+     */
+    virtual size_t GetMaxCharLen() const;
+
    /**
        This function returns 1 for most of the multibyte encodings in which the
        string is terminated by a single @c NUL, 2 for UTF-16 and 4 for UTF-32 for