implemented UTF-16/32 using To/FromWChar() instead of MB2WC/WC2MB for sizeof(wchar_t)==4 platforms too

git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/trunk@38586 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775
This commit is contained in:
Vadim Zeitlin
2006-04-05 23:04:10 +00:00
parent 467e04791c
commit 35d11700a5
2 changed files with 188 additions and 170 deletions

View File

@@ -286,15 +286,10 @@ protected:
class WXDLLIMPEXP_BASE wxMBConvUTF16LE : public wxMBConvUTF16Base class WXDLLIMPEXP_BASE wxMBConvUTF16LE : public wxMBConvUTF16Base
{ {
public: public:
#if SIZEOF_WCHAR_T == 2
virtual size_t ToWChar(wchar_t *dst, size_t dstLen, virtual size_t ToWChar(wchar_t *dst, size_t dstLen,
const char *src, size_t srcLen = wxNO_LEN) const; const char *src, size_t srcLen = wxNO_LEN) const;
virtual size_t FromWChar(char *dst, size_t dstLen, virtual size_t FromWChar(char *dst, size_t dstLen,
const wchar_t *src, size_t srcLen = wxNO_LEN) const; const wchar_t *src, size_t srcLen = wxNO_LEN) const;
#else
virtual size_t MB2WC(wchar_t *outputBuf, const char *psz, size_t outputSize) const;
virtual size_t WC2MB(char *outputBuf, const wchar_t *psz, size_t outputSize) const;
#endif
virtual wxMBConv *Clone() const { return new wxMBConvUTF16LE; } virtual wxMBConv *Clone() const { return new wxMBConvUTF16LE; }
}; };
@@ -305,15 +300,10 @@ public:
class WXDLLIMPEXP_BASE wxMBConvUTF16BE : public wxMBConvUTF16Base class WXDLLIMPEXP_BASE wxMBConvUTF16BE : public wxMBConvUTF16Base
{ {
public: public:
#if SIZEOF_WCHAR_T == 2
virtual size_t ToWChar(wchar_t *dst, size_t dstLen, virtual size_t ToWChar(wchar_t *dst, size_t dstLen,
const char *src, size_t srcLen = wxNO_LEN) const; const char *src, size_t srcLen = wxNO_LEN) const;
virtual size_t FromWChar(char *dst, size_t dstLen, virtual size_t FromWChar(char *dst, size_t dstLen,
const wchar_t *src, size_t srcLen = wxNO_LEN) const; const wchar_t *src, size_t srcLen = wxNO_LEN) const;
#else
virtual size_t MB2WC(wchar_t *outputBuf, const char *psz, size_t outputSize) const;
virtual size_t WC2MB(char *outputBuf, const wchar_t *psz, size_t outputSize) const;
#endif
virtual wxMBConv *Clone() const { return new wxMBConvUTF16BE; } virtual wxMBConv *Clone() const { return new wxMBConvUTF16BE; }
}; };
@@ -342,15 +332,10 @@ protected:
class WXDLLIMPEXP_BASE wxMBConvUTF32LE : public wxMBConvUTF32Base class WXDLLIMPEXP_BASE wxMBConvUTF32LE : public wxMBConvUTF32Base
{ {
public: public:
#if SIZEOF_WCHAR_T == 2
virtual size_t ToWChar(wchar_t *dst, size_t dstLen, virtual size_t ToWChar(wchar_t *dst, size_t dstLen,
const char *src, size_t srcLen = wxNO_LEN) const; const char *src, size_t srcLen = wxNO_LEN) const;
virtual size_t FromWChar(char *dst, size_t dstLen, virtual size_t FromWChar(char *dst, size_t dstLen,
const wchar_t *src, size_t srcLen = wxNO_LEN) const; const wchar_t *src, size_t srcLen = wxNO_LEN) const;
#else
virtual size_t MB2WC(wchar_t *outputBuf, const char *psz, size_t outputSize) const;
virtual size_t WC2MB(char *outputBuf, const wchar_t *psz, size_t outputSize) const;
#endif
virtual wxMBConv *Clone() const { return new wxMBConvUTF32LE; } virtual wxMBConv *Clone() const { return new wxMBConvUTF32LE; }
}; };
@@ -361,15 +346,10 @@ public:
class WXDLLIMPEXP_BASE wxMBConvUTF32BE : public wxMBConvUTF32Base class WXDLLIMPEXP_BASE wxMBConvUTF32BE : public wxMBConvUTF32Base
{ {
public: public:
#if SIZEOF_WCHAR_T == 2
virtual size_t ToWChar(wchar_t *dst, size_t dstLen, virtual size_t ToWChar(wchar_t *dst, size_t dstLen,
const char *src, size_t srcLen = wxNO_LEN) const; const char *src, size_t srcLen = wxNO_LEN) const;
virtual size_t FromWChar(char *dst, size_t dstLen, virtual size_t FromWChar(char *dst, size_t dstLen,
const wchar_t *src, size_t srcLen = wxNO_LEN) const; const wchar_t *src, size_t srcLen = wxNO_LEN) const;
#else
virtual size_t MB2WC(wchar_t *outputBuf, const char *psz, size_t outputSize) const;
virtual size_t WC2MB(char *outputBuf, const wchar_t *psz, size_t outputSize) const;
#endif
virtual wxMBConv *Clone() const { return new wxMBConvUTF32BE; } virtual wxMBConv *Clone() const { return new wxMBConvUTF32BE; }
}; };

View File

@@ -144,13 +144,17 @@ static size_t decode_utf16(const wxUint16* input, wxUint32& output)
} }
#ifdef WC_UTF16 #ifdef WC_UTF16
typedef wchar_t wxDecodeSurrogate_t;
#else // !WC_UTF16
typedef wxUint16 wxDecodeSurrogate_t;
#endif // WC_UTF16/!WC_UTF16
// returns the next UTF-32 character from the wchar_t buffer and advances the // returns the next UTF-32 character from the wchar_t buffer and advances the
// pointer to the character after this one // pointer to the character after this one
// //
// if an invalid character is found, *pSrc is set to NULL, the caller must // if an invalid character is found, *pSrc is set to NULL, the caller must
// check for this // check for this
static wxUint32 wxDecodeSurrogate(const wchar_t **pSrc) static wxUint32 wxDecodeSurrogate(const wxDecodeSurrogate_t **pSrc)
{ {
wxUint32 out; wxUint32 out;
const size_t n = decode_utf16(*pSrc, out); const size_t n = decode_utf16(*pSrc, out);
@@ -162,8 +166,6 @@ static wxUint32 wxDecodeSurrogate(const wchar_t **pSrc)
return out; return out;
} }
#endif // WC_UTF16
// ---------------------------------------------------------------------------- // ----------------------------------------------------------------------------
// wxMBConv // wxMBConv
// ---------------------------------------------------------------------------- // ----------------------------------------------------------------------------
@@ -1071,130 +1073,163 @@ wxMBConvUTF16swap::FromWChar(char *dst, size_t dstLen,
// conversions without endianness change // conversions without endianness change
// ---------------------------------------------------------------------------- // ----------------------------------------------------------------------------
size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const size_t
wxMBConvUTF16straight::ToWChar(wchar_t *dst, size_t dstLen,
const char *src, size_t srcLen) const
{ {
size_t len=0; srcLen = GetLength(src, srcLen);
if ( srcLen == wxNO_LEN )
return wxCONV_FAILED;
while (*(wxUint16*)psz && (!buf || len < n)) const size_t inLen = srcLen/BYTES_PER_CHAR;
if ( !dst )
{ {
wxUint32 cc; // optimization: return maximal space which could be needed for this
size_t pa=decode_utf16((wxUint16*)psz, cc); // string even if the real size could be smaller if the buffer contains
if (pa == wxCONV_FAILED) // any surrogates
return pa; return inLen;
if (buf)
*buf++ = (wchar_t)cc;
len++;
psz += pa * sizeof(wxUint16);
} }
if (buf && len<n) *buf=0;
return len; size_t outLen = 0;
const wxUint16 *in = wx_reinterpret_cast(const wxUint16 *, src);
for ( const wxUint16 * const inEnd = in + inLen; in < inEnd; )
{
const wxUint32 ch = wxDecodeSurrogate(&in);
if ( !in )
return wxCONV_FAILED;
if ( ++outLen > dstLen )
return wxCONV_FAILED;
*dst++ = ch;
} }
// copy 32bit String to 16bit MB return outLen;
size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const }
{
size_t len=0;
while (*psz && (!buf || len < n)) size_t
wxMBConvUTF16straight::FromWChar(char *dst, size_t dstLen,
const wchar_t *src, size_t srcLen) const
{
if ( srcLen == wxNO_LEN )
srcLen = wxWcslen(src) + 1;
size_t outLen = 0;
wxUint16 *out = wx_reinterpret_cast(wxUint16 *, dst);
for ( size_t n = 0; n < srcLen; n++ )
{ {
wxUint16 cc[2]; wxUint16 cc[2];
size_t pa=encode_utf16(*psz, cc); const size_t numChars = encode_utf16(*src++, cc);
if ( numChars == wxCONV_FAILED )
return wxCONV_FAILED;
if (pa == wxCONV_FAILED) outLen += numChars*BYTES_PER_CHAR;
return pa; if ( out )
if (buf)
{ {
*(wxUint16*)buf = cc[0]; if ( outLen > dstLen )
buf += sizeof(wxUint16); return wxCONV_FAILED;
if (pa > 1)
*out++ = cc[0];
if ( numChars == 2 )
{ {
*(wxUint16*)buf = cc[1]; // second character of a surrogate
buf += sizeof(wxUint16); *out++ = cc[1];
}
} }
} }
len += pa*sizeof(wxUint16); return outLen;
psz++;
}
if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
return len;
} }
// ---------------------------------------------------------------------------- // ----------------------------------------------------------------------------
// endian-reversing conversions // endian-reversing conversions
// ---------------------------------------------------------------------------- // ----------------------------------------------------------------------------
// swap 16bit MB to 32bit String size_t
size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const wxMBConvUTF16swap::ToWChar(wchar_t *dst, size_t dstLen,
const char *src, size_t srcLen) const
{ {
size_t len=0; srcLen = GetLength(src, srcLen);
if ( srcLen == wxNO_LEN )
return wxCONV_FAILED;
while (*(wxUint16*)psz && (!buf || len < n)) const size_t inLen = srcLen/BYTES_PER_CHAR;
if ( !dst )
{ {
wxUint32 cc; // optimization: return maximal space which could be needed for this
char tmp[4]; // string even if the real size could be smaller if the buffer contains
tmp[0]=psz[1]; tmp[1]=psz[0]; // any surrogates
tmp[2]=psz[3]; tmp[3]=psz[2]; return inLen;
size_t pa=decode_utf16((wxUint16*)tmp, cc);
if (pa == wxCONV_FAILED)
return pa;
if (buf)
*buf++ = (wchar_t)cc;
len++;
psz += pa * sizeof(wxUint16);
} }
if (buf && len<n) *buf=0;
return len; size_t outLen = 0;
const wxUint16 *in = wx_reinterpret_cast(const wxUint16 *, src);
for ( const wxUint16 * const inEnd = in + inLen; in < inEnd; )
{
wxUint32 ch;
wxUint16 tmp[2];
tmp[0] = wxUINT16_SWAP_ALWAYS(*in);
in++;
tmp[1] = wxUINT16_SWAP_ALWAYS(*in);
const size_t numChars = decode_utf16(tmp, ch);
if ( numChars == wxCONV_FAILED )
return wxCONV_FAILED;
if ( numChars == 2 )
in++;
if ( ++outLen > dstLen )
return wxCONV_FAILED;
*dst++ = ch;
} }
// swap 32bit String to 16bit MB return outLen;
size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const }
{
size_t len=0;
while (*psz && (!buf || len < n)) size_t
wxMBConvUTF16swap::FromWChar(char *dst, size_t dstLen,
const wchar_t *src, size_t srcLen) const
{
if ( srcLen == wxNO_LEN )
srcLen = wxWcslen(src) + 1;
size_t outLen = 0;
wxUint16 *out = wx_reinterpret_cast(wxUint16 *, dst);
for ( const wchar_t *srcEnd = src + srcLen; src < srcEnd; src++ )
{ {
wxUint16 cc[2]; wxUint16 cc[2];
size_t pa=encode_utf16(*psz, cc); const size_t numChars = encode_utf16(*src, cc);
if ( numChars == wxCONV_FAILED )
return wxCONV_FAILED;
if (pa == wxCONV_FAILED) outLen += numChars*BYTES_PER_CHAR;
return pa; if ( out )
if (buf)
{ {
*buf++ = ((char*)cc)[1]; if ( outLen > dstLen )
*buf++ = ((char*)cc)[0]; return wxCONV_FAILED;
if (pa > 1)
*out++ = wxUINT16_SWAP_ALWAYS(cc[0]);
if ( numChars == 2 )
{ {
*buf++ = ((char*)cc)[3]; // second character of a surrogate
*buf++ = ((char*)cc)[2]; *out++ = wxUINT16_SWAP_ALWAYS(cc[1]);
}
} }
} }
len += pa*sizeof(wxUint16); return outLen;
psz++;
}
if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
return len;
} }
#endif // WC_UTF16/!WC_UTF16 #endif // WC_UTF16/!WC_UTF16
// ---------------------------------------------------------------------------- // ============================================================================
// UTF-32 // UTF-32
// ---------------------------------------------------------------------------- // ============================================================================
#ifdef WORDS_BIGENDIAN #ifdef WORDS_BIGENDIAN
#define wxMBConvUTF32straight wxMBConvUTF32BE #define wxMBConvUTF32straight wxMBConvUTF32BE
@@ -1391,101 +1426,104 @@ wxMBConvUTF32swap::FromWChar(char *dst, size_t dstLen,
#else // !WC_UTF16: wchar_t is UTF-32 #else // !WC_UTF16: wchar_t is UTF-32
// copy 32bit MB to 32bit String // ----------------------------------------------------------------------------
size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const // conversions without endianness change
{ // ----------------------------------------------------------------------------
size_t len=0;
while (*(wxUint32*)psz && (!buf || len < n)) size_t
wxMBConvUTF32straight::ToWChar(wchar_t *dst, size_t dstLen,
const char *src, size_t srcLen) const
{ {
if (buf) // use memcpy() as it should be much faster than hand-written loop
*buf++ = (wchar_t)(*(wxUint32*)psz); srcLen = GetLength(src, srcLen);
len++; if ( srcLen == wxNO_LEN )
psz += sizeof(wxUint32); return wxCONV_FAILED;
const size_t inLen = srcLen/BYTES_PER_CHAR;
if ( dst )
{
if ( dstLen < inLen )
return wxCONV_FAILED;
memcpy(dst, src, srcLen);
} }
if (buf && len<n) return inLen;
*buf=0;
return len;
} }
size_t
// copy 32bit String to 32bit MB wxMBConvUTF32straight::FromWChar(char *dst, size_t dstLen,
size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const const wchar_t *src, size_t srcLen) const
{ {
size_t len=0; if ( srcLen == wxNO_LEN )
srcLen = wxWcslen(src) + 1;
while (*psz && (!buf || len < n)) srcLen *= BYTES_PER_CHAR;
if ( dst )
{ {
if (buf) if ( dstLen < srcLen )
return wxCONV_FAILED;
memcpy(dst, src, srcLen);
}
return srcLen;
}
// ----------------------------------------------------------------------------
// endian-reversing conversions
// ----------------------------------------------------------------------------
size_t
wxMBConvUTF32swap::ToWChar(wchar_t *dst, size_t dstLen,
const char *src, size_t srcLen) const
{ {
*(wxUint32*)buf = *psz; srcLen = GetLength(src, srcLen);
buf += sizeof(wxUint32); if ( srcLen == wxNO_LEN )
} return wxCONV_FAILED;
len += sizeof(wxUint32); srcLen /= BYTES_PER_CHAR;
psz++;
}
if (buf && len<=n-sizeof(wxUint32)) if ( dst )
*(wxUint32*)buf=0;
return len;
}
// swap 32bit MB to 32bit String
size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
{ {
size_t len=0; if ( dstLen < srcLen )
return wxCONV_FAILED;
while (*(wxUint32*)psz && (!buf || len < n)) const wxUint32 *in = wx_reinterpret_cast(const wxUint32 *, src);
for ( size_t n = 0; n < srcLen; n++, in++ )
{ {
if (buf) *dst++ = wxUINT32_SWAP_ALWAYS(*in);
}
}
return srcLen;
}
size_t
wxMBConvUTF32swap::FromWChar(char *dst, size_t dstLen,
const wchar_t *src, size_t srcLen) const
{ {
((char *)buf)[0] = psz[3]; if ( srcLen == wxNO_LEN )
((char *)buf)[1] = psz[2]; srcLen = wxWcslen(src) + 1;
((char *)buf)[2] = psz[1];
((char *)buf)[3] = psz[0];
buf++;
}
len++;
psz += sizeof(wxUint32);
}
if (buf && len<n) srcLen *= BYTES_PER_CHAR;
*buf=0;
return len; if ( dst )
}
// swap 32bit String to 32bit MB
size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
{ {
size_t len=0; if ( dstLen < srcLen )
return wxCONV_FAILED;
while (*psz && (!buf || len < n)) wxUint32 *out = wx_reinterpret_cast(wxUint32 *, dst);
for ( size_t n = 0; n < srcLen; n += BYTES_PER_CHAR, src++ )
{ {
if (buf) *out++ = wxUINT32_SWAP_ALWAYS(*src);
{
*buf++ = ((char *)psz)[3];
*buf++ = ((char *)psz)[2];
*buf++ = ((char *)psz)[1];
*buf++ = ((char *)psz)[0];
} }
len += sizeof(wxUint32);
psz++;
} }
if (buf && len<=n-sizeof(wxUint32)) return srcLen;
*(wxUint32*)buf=0;
return len;
} }
#endif // WC_UTF16/!WC_UTF16 #endif // WC_UTF16/!WC_UTF16