Add wxMBConv::GetMaxCharLen()

This is not used yet, but will be needed soon in order to determine
whether we have sufficiently many to decode them.
This commit is contained in:
Vadim Zeitlin
2019-04-21 20:04:52 +02:00
parent bd37af32b1
commit af83769bd0
2 changed files with 40 additions and 0 deletions

View File

@@ -32,6 +32,8 @@ class WXDLLIMPEXP_FWD_BASE wxString;
// don't let the fact that the existing classes implement MB2WC/WC2MB() instead // don't let the fact that the existing classes implement MB2WC/WC2MB() instead
// confuse you. // confuse you.
// //
// For many encodings you must override GetMaxCharLen().
//
// You also have to implement Clone() to allow copying the conversions // You also have to implement Clone() to allow copying the conversions
// polymorphically. // polymorphically.
// //
@@ -118,6 +120,10 @@ public:
wxWCharBuffer cWX2WC(const char *psz) const { return cMB2WC(psz); } wxWCharBuffer cWX2WC(const char *psz) const { return cMB2WC(psz); }
#endif // Unicode/ANSI #endif // Unicode/ANSI
// return the maximum number of bytes that can be required to encode a
// single character in this encoding, e.g. 4 for UTF-8
virtual size_t GetMaxCharLen() const { return 1; }
// this function is used in the implementation of cMB2WC() to distinguish // this function is used in the implementation of cMB2WC() to distinguish
// between the following cases: // between the following cases:
// //
@@ -254,6 +260,8 @@ public:
virtual size_t FromWChar(char *dst, size_t dstLen, virtual size_t FromWChar(char *dst, size_t dstLen,
const wchar_t *src, size_t srcLen = wxNO_LEN) const wxOVERRIDE; const wchar_t *src, size_t srcLen = wxNO_LEN) const wxOVERRIDE;
virtual size_t GetMaxCharLen() const wxOVERRIDE { return 4; }
virtual wxMBConv *Clone() const wxOVERRIDE { return new wxMBConvUTF7; } virtual wxMBConv *Clone() const wxOVERRIDE { return new wxMBConvUTF7; }
private: private:
@@ -341,6 +349,8 @@ public:
virtual size_t FromWChar(char *dst, size_t dstLen, virtual size_t FromWChar(char *dst, size_t dstLen,
const wchar_t *src, size_t srcLen = wxNO_LEN) const wxOVERRIDE; const wchar_t *src, size_t srcLen = wxNO_LEN) const wxOVERRIDE;
virtual size_t GetMaxCharLen() const wxOVERRIDE { return 4; }
virtual wxMBConv *Clone() const wxOVERRIDE { return new wxMBConvStrictUTF8(); } virtual wxMBConv *Clone() const wxOVERRIDE { return new wxMBConvStrictUTF8(); }
// NB: other mapping modes are not, strictly speaking, UTF-8, so we can't // NB: other mapping modes are not, strictly speaking, UTF-8, so we can't
@@ -365,6 +375,8 @@ public:
virtual size_t FromWChar(char *dst, size_t dstLen, virtual size_t FromWChar(char *dst, size_t dstLen,
const wchar_t *src, size_t srcLen = wxNO_LEN) const wxOVERRIDE; const wchar_t *src, size_t srcLen = wxNO_LEN) const wxOVERRIDE;
virtual size_t GetMaxCharLen() const wxOVERRIDE { return 4; }
virtual wxMBConv *Clone() const wxOVERRIDE { return new wxMBConvUTF8(m_options); } virtual wxMBConv *Clone() const wxOVERRIDE { return new wxMBConvUTF8(m_options); }
// NB: other mapping modes are not, strictly speaking, UTF-8, so we can't // NB: other mapping modes are not, strictly speaking, UTF-8, so we can't
@@ -405,6 +417,7 @@ public:
const char *src, size_t srcLen = wxNO_LEN) const wxOVERRIDE; const char *src, size_t srcLen = wxNO_LEN) const wxOVERRIDE;
virtual size_t FromWChar(char *dst, size_t dstLen, virtual size_t FromWChar(char *dst, size_t dstLen,
const wchar_t *src, size_t srcLen = wxNO_LEN) const wxOVERRIDE; const wchar_t *src, size_t srcLen = wxNO_LEN) const wxOVERRIDE;
virtual size_t GetMaxCharLen() const wxOVERRIDE { return 4; }
virtual wxMBConv *Clone() const wxOVERRIDE { return new wxMBConvUTF16LE; } virtual wxMBConv *Clone() const wxOVERRIDE { return new wxMBConvUTF16LE; }
}; };
@@ -419,6 +432,7 @@ public:
const char *src, size_t srcLen = wxNO_LEN) const wxOVERRIDE; const char *src, size_t srcLen = wxNO_LEN) const wxOVERRIDE;
virtual size_t FromWChar(char *dst, size_t dstLen, virtual size_t FromWChar(char *dst, size_t dstLen,
const wchar_t *src, size_t srcLen = wxNO_LEN) const wxOVERRIDE; const wchar_t *src, size_t srcLen = wxNO_LEN) const wxOVERRIDE;
virtual size_t GetMaxCharLen() const wxOVERRIDE { return 4; }
virtual wxMBConv *Clone() const wxOVERRIDE { return new wxMBConvUTF16BE; } virtual wxMBConv *Clone() const wxOVERRIDE { return new wxMBConvUTF16BE; }
}; };
@@ -451,6 +465,7 @@ public:
const char *src, size_t srcLen = wxNO_LEN) const wxOVERRIDE; const char *src, size_t srcLen = wxNO_LEN) const wxOVERRIDE;
virtual size_t FromWChar(char *dst, size_t dstLen, virtual size_t FromWChar(char *dst, size_t dstLen,
const wchar_t *src, size_t srcLen = wxNO_LEN) const wxOVERRIDE; const wchar_t *src, size_t srcLen = wxNO_LEN) const wxOVERRIDE;
virtual size_t GetMaxCharLen() const wxOVERRIDE { return 4; }
virtual wxMBConv *Clone() const wxOVERRIDE { return new wxMBConvUTF32LE; } virtual wxMBConv *Clone() const wxOVERRIDE { return new wxMBConvUTF32LE; }
}; };
@@ -465,6 +480,7 @@ public:
const char *src, size_t srcLen = wxNO_LEN) const wxOVERRIDE; const char *src, size_t srcLen = wxNO_LEN) const wxOVERRIDE;
virtual size_t FromWChar(char *dst, size_t dstLen, virtual size_t FromWChar(char *dst, size_t dstLen,
const wchar_t *src, size_t srcLen = wxNO_LEN) const wxOVERRIDE; const wchar_t *src, size_t srcLen = wxNO_LEN) const wxOVERRIDE;
virtual size_t GetMaxCharLen() const wxOVERRIDE { return 4; }
virtual wxMBConv *Clone() const wxOVERRIDE { return new wxMBConvUTF32BE; } virtual wxMBConv *Clone() const wxOVERRIDE { return new wxMBConvUTF32BE; }
}; };
@@ -566,6 +582,10 @@ public:
FromWChar(char *dst, size_t dstLen, FromWChar(char *dst, size_t dstLen,
const wchar_t *src, size_t srcLen = wxNO_LEN) const wxOVERRIDE; const wchar_t *src, size_t srcLen = wxNO_LEN) const wxOVERRIDE;
// Use the value for UTF-8 here to make sure we try to decode up to 4 bytes
// as UTF-8 before giving up.
virtual size_t GetMaxCharLen() const wxOVERRIDE { return 4; }
virtual wxMBConv *Clone() const wxOVERRIDE virtual wxMBConv *Clone() const wxOVERRIDE
{ {
return new wxWhateverWorksConv(); return new wxWhateverWorksConv();

View File

@@ -48,6 +48,26 @@ public:
*/ */
virtual wxMBConv* Clone() const = 0; virtual wxMBConv* Clone() const = 0;
/**
This function must be overridden in the derived classes to return the
maximum length, in bytes, of a single Unicode character representation
in this encoding.
As a consequence, the conversion object must be able to decode any
valid sequence of bytes in the corresponding encoding if it's at least
that many bytes long, but may fail if it is shorter. For example, for
UTF-8 the maximum character length is 4, as 3 bytes or less may be
insufficient to represent a Unicode character in UTF-8, but 4 are
always enough.
For compatibility reasons, this method is not pure virtual and returns
1 by default in the base class, however it should be always overridden
in the derived classes.
@since 3.1.3
*/
virtual size_t GetMaxCharLen() const;
/** /**
This function returns 1 for most of the multibyte encodings in which the This function returns 1 for most of the multibyte encodings in which the
string is terminated by a single @c NUL, 2 for UTF-16 and 4 for UTF-32 for string is terminated by a single @c NUL, 2 for UTF-16 and 4 for UTF-32 for