Merge wchar_t-surrogates branch

Fix several problems with surrogates in UTF-16-encoded wchar_t strings:
correct bugs in UTF-8 and UTF-32 conversion code and add surrogates support to
wxText{Input,Output}Stream classes.

Closes #17070.
This commit is contained in:
Vadim Zeitlin
2015-11-13 20:35:01 +01:00
5 changed files with 249 additions and 16 deletions

View File

@@ -87,7 +87,16 @@ protected:
#if wxUSE_UNICODE
wxMBConv *m_conv;
#endif
// The second half of a surrogate character when using UTF-16 for wchar_t:
// we can't return it immediately from GetChar() when we read a Unicode
// code point outside of the BMP, but we can't keep it in m_lastBytes
// neither because it can't separately decoded, so we have a separate 1
// wchar_t buffer just for this case.
#if SIZEOF_WCHAR_T == 2
wchar_t m_lastWChar;
#endif // SIZEOF_WCHAR_T == 2
#endif // wxUSE_UNICODE
bool EatEOL(const wxChar &c);
void UngetLast(); // should be used instead of wxInputStream::Ungetch() because of Unicode issues
@@ -165,7 +174,13 @@ protected:
#if wxUSE_UNICODE
wxMBConv *m_conv;
#endif
#if SIZEOF_WCHAR_T == 2
// The first half of a surrogate character if one was passed to PutChar()
// and couldn't be output when it was called the last time.
wchar_t m_lastWChar;
#endif // SIZEOF_WCHAR_T == 2
#endif // wxUSE_UNICODE
wxDECLARE_NO_COPY_CLASS(wxTextOutputStream);
};