added new To/FromWChar() API with more reasonable semantics than old MB2WC/WC2MB; for now both coexist and the change is/should be backwards compatible
git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/trunk@38541 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775
This commit is contained in:
@@ -147,6 +147,175 @@ static size_t decode_utf16(const wxUint16* input, wxUint32& output)
|
||||
// wxMBConv
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
size_t
|
||||
wxMBConv::ToWChar(wchar_t *dst, size_t dstLen,
|
||||
const char *src, size_t srcLen) const
|
||||
{
|
||||
// although new conversion classes are supposed to implement this function
|
||||
// directly, the existins ones only implement the old MB2WC() and so, to
|
||||
// avoid to have to rewrite all conversion classes at once, we provide a
|
||||
// default (but not efficient) implementation of this one in terms of the
|
||||
// old function by copying the input to ensure that it's NUL-terminated and
|
||||
// then using MB2WC() to convert it
|
||||
|
||||
// the number of chars [which would be] written to dst [if it were not NULL]
|
||||
size_t dstWritten = 0;
|
||||
|
||||
// the number of NULs terminating this string
|
||||
size_t nulLen wxDUMMY_INITIALIZE(0);
|
||||
|
||||
// if we were not given the input size we just have to assume that the
|
||||
// string is properly terminated as we have no way of knowing how long it
|
||||
// is anyhow, but if we do have the size check whether there are enough
|
||||
// NULs at the end
|
||||
wxCharBuffer bufTmp;
|
||||
const char *srcEnd;
|
||||
if ( srcLen != (size_t)-1 )
|
||||
{
|
||||
// we need to know how to find the end of this string
|
||||
nulLen = GetMBNulLen();
|
||||
if ( nulLen == wxCONV_FAILED )
|
||||
return wxCONV_FAILED;
|
||||
|
||||
// if there are enough NULs we can avoid the copy
|
||||
if ( srcLen < nulLen || NotAllNULs(src + srcLen - nulLen, nulLen) )
|
||||
{
|
||||
// make a copy in order to properly NUL-terminate the string
|
||||
bufTmp = wxCharBuffer(srcLen + nulLen - 1 /* 1 will be added */);
|
||||
char * const p = bufTmp.data();
|
||||
memcpy(p, src, srcLen);
|
||||
for ( char *s = p + srcLen; s < p + srcLen + nulLen; s++ )
|
||||
*s = '\0';
|
||||
|
||||
src = bufTmp;
|
||||
}
|
||||
|
||||
srcEnd = src + srcLen;
|
||||
}
|
||||
else // quit after the first loop iteration
|
||||
{
|
||||
srcEnd = NULL;
|
||||
}
|
||||
|
||||
for ( ;; )
|
||||
{
|
||||
// try to convert the current chunk
|
||||
size_t lenChunk = MB2WC(NULL, src, 0);
|
||||
if ( lenChunk == 0 )
|
||||
{
|
||||
// nothing left in the input string, conversion succeeded
|
||||
break;
|
||||
}
|
||||
|
||||
if ( lenChunk == wxCONV_FAILED )
|
||||
return wxCONV_FAILED;
|
||||
|
||||
// if we already have a previous chunk, leave the NUL separating it
|
||||
// from this one
|
||||
if ( dstWritten )
|
||||
{
|
||||
dstWritten++;
|
||||
if ( dst )
|
||||
dst++;
|
||||
}
|
||||
|
||||
dstWritten += lenChunk;
|
||||
|
||||
if ( dst )
|
||||
{
|
||||
if ( dstWritten > dstLen )
|
||||
return wxCONV_FAILED;
|
||||
|
||||
lenChunk = MB2WC(dst, src, lenChunk + 1 /* for NUL */);
|
||||
if ( lenChunk == wxCONV_FAILED )
|
||||
return wxCONV_FAILED;
|
||||
|
||||
dst += lenChunk;
|
||||
}
|
||||
|
||||
if ( !srcEnd )
|
||||
{
|
||||
// we convert the entire string in this cas, as we suppose that the
|
||||
// string is NUL-terminated and so srcEnd is not used at all
|
||||
break;
|
||||
}
|
||||
|
||||
// advance the input pointer past the end of this chunk
|
||||
while ( NotAllNULs(src, nulLen) )
|
||||
{
|
||||
// notice that we must skip over multiple bytes here as we suppose
|
||||
// that if NUL takes 2 or 4 bytes, then all the other characters do
|
||||
// too and so if advanced by a single byte we might erroneously
|
||||
// detect sequences of NUL bytes in the middle of the input
|
||||
src += nulLen;
|
||||
}
|
||||
|
||||
src += nulLen; // skipping over its terminator as well
|
||||
|
||||
// note that ">=" (and not just "==") is needed here as the terminator
|
||||
// we skipped just above could be inside or just after the buffer
|
||||
// delimited by inEnd
|
||||
if ( src >= srcEnd )
|
||||
break;
|
||||
}
|
||||
|
||||
return dstWritten;
|
||||
}
|
||||
|
||||
size_t
|
||||
wxMBConv::FromWChar(char *dst, size_t dstLen,
|
||||
const wchar_t *src, size_t srcLen) const
|
||||
{
|
||||
// the number of chars [which would be] written to dst [if it were not NULL]
|
||||
size_t dstWritten = 0;
|
||||
|
||||
// make a copy of the input string unless it is already properly
|
||||
// NUL-terminated
|
||||
//
|
||||
// if we don't know its length we have no choice but to assume that it is,
|
||||
// indeed, properly terminated
|
||||
wxWCharBuffer bufTmp;
|
||||
if ( srcLen == (size_t)-1 )
|
||||
{
|
||||
srcLen = wxWcslen(src) + 1;
|
||||
}
|
||||
else if ( srcLen != 0 && src[srcLen - 1] != L'\0' )
|
||||
{
|
||||
// make a copy in order to properly NUL-terminate the string
|
||||
bufTmp = wxWCharBuffer(srcLen);
|
||||
memcpy(bufTmp.data(), src, srcLen*sizeof(wchar_t));
|
||||
src = bufTmp;
|
||||
}
|
||||
|
||||
const size_t lenNul = GetMBNulLen();
|
||||
for ( const wchar_t * const srcEnd = src + srcLen;
|
||||
src < srcEnd;
|
||||
src += wxWcslen(src) + 1 /* skip L'\0' too */ )
|
||||
{
|
||||
// try to convert the current chunk
|
||||
size_t lenChunk = WC2MB(NULL, src, 0);
|
||||
|
||||
if ( lenChunk == wxCONV_FAILED )
|
||||
return wxCONV_FAILED;
|
||||
|
||||
lenChunk += lenNul;
|
||||
dstWritten += lenChunk;
|
||||
|
||||
if ( dst )
|
||||
{
|
||||
if ( dstWritten > dstLen )
|
||||
return wxCONV_FAILED;
|
||||
|
||||
if ( WC2MB(dst, src, lenChunk) == wxCONV_FAILED )
|
||||
return wxCONV_FAILED;
|
||||
|
||||
dst += lenChunk;
|
||||
}
|
||||
}
|
||||
|
||||
return dstWritten;
|
||||
}
|
||||
|
||||
wxMBConv::~wxMBConv()
|
||||
{
|
||||
// nothing to do here (necessary for Darwin linking probably)
|
||||
@@ -157,217 +326,76 @@ const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
|
||||
if ( psz )
|
||||
{
|
||||
// calculate the length of the buffer needed first
|
||||
size_t nLen = MB2WC(NULL, psz, 0);
|
||||
if ( nLen != (size_t)-1 )
|
||||
const size_t nLen = MB2WC(NULL, psz, 0);
|
||||
if ( nLen != wxCONV_FAILED )
|
||||
{
|
||||
// now do the actual conversion
|
||||
wxWCharBuffer buf(nLen);
|
||||
nLen = MB2WC(buf.data(), psz, nLen + 1); // with the trailing NULL
|
||||
if ( nLen != (size_t)-1 )
|
||||
{
|
||||
wxWCharBuffer buf(nLen /* +1 added implicitly */);
|
||||
|
||||
// +1 for the trailing NULL
|
||||
if ( MB2WC(buf.data(), psz, nLen + 1) != wxCONV_FAILED )
|
||||
return buf;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
wxWCharBuffer buf((wchar_t *)NULL);
|
||||
|
||||
return buf;
|
||||
return wxWCharBuffer();
|
||||
}
|
||||
|
||||
const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const
|
||||
{
|
||||
if ( pwz )
|
||||
{
|
||||
size_t nLen = WC2MB(NULL, pwz, 0);
|
||||
if ( nLen != (size_t)-1 )
|
||||
const size_t nLen = WC2MB(NULL, pwz, 0);
|
||||
if ( nLen != wxCONV_FAILED )
|
||||
{
|
||||
wxCharBuffer buf(nLen+3); // space for a wxUint32 trailing zero
|
||||
nLen = WC2MB(buf.data(), pwz, nLen + 4);
|
||||
if ( nLen != (size_t)-1 )
|
||||
{
|
||||
// extra space for trailing NUL(s)
|
||||
static const size_t extraLen = GetMaxMBNulLen();
|
||||
|
||||
wxCharBuffer buf(nLen + extraLen - 1);
|
||||
if ( WC2MB(buf.data(), pwz, nLen + extraLen) != wxCONV_FAILED )
|
||||
return buf;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
wxCharBuffer buf((char *)NULL);
|
||||
|
||||
return buf;
|
||||
return wxCharBuffer();
|
||||
}
|
||||
|
||||
const wxWCharBuffer
|
||||
wxMBConv::cMB2WC(const char *in, size_t inLen, size_t *outLen) const
|
||||
{
|
||||
// the currently accumulated wide characters
|
||||
wxWCharBuffer wbuf;
|
||||
|
||||
// the current length of wbuf
|
||||
size_t lenBuf = 0;
|
||||
|
||||
// the number of NULs terminating this string
|
||||
size_t nulLen wxDUMMY_INITIALIZE(0);
|
||||
|
||||
// make a copy of the input string unless it is already properly
|
||||
// NUL-terminated
|
||||
wxCharBuffer bufTmp;
|
||||
|
||||
// if we were not given the input size we just have to assume that the
|
||||
// string is properly terminated as we have no way of knowing how long it
|
||||
// is anyhow, but if we do have the size check whether there are enough
|
||||
// NULs at the end
|
||||
if ( inLen != (size_t)-1 )
|
||||
const size_t dstLen = ToWChar(NULL, 0, in, inLen);
|
||||
if ( dstLen != wxCONV_FAILED )
|
||||
{
|
||||
// we need to know how to find the end of this string
|
||||
nulLen = GetMBNulLen();
|
||||
if ( nulLen == (size_t)-1 )
|
||||
wxWCharBuffer wbuf(dstLen);
|
||||
if ( ToWChar(wbuf.data(), dstLen, in, inLen) )
|
||||
{
|
||||
if ( outLen )
|
||||
*outLen = dstLen;
|
||||
return wbuf;
|
||||
|
||||
// if there are enough NULs we can avoid the copy
|
||||
if ( inLen < nulLen || NotAllNULs(in + inLen - nulLen, nulLen) )
|
||||
{
|
||||
// make a copy in order to properly NUL-terminate the string
|
||||
bufTmp = wxCharBuffer(inLen + nulLen - 1 /* 1 will be added */);
|
||||
char * const p = bufTmp.data();
|
||||
memcpy(p, in, inLen);
|
||||
for ( char *s = p + inLen; s < p + inLen + nulLen; s++ )
|
||||
*s = '\0';
|
||||
}
|
||||
}
|
||||
|
||||
if ( bufTmp )
|
||||
in = bufTmp;
|
||||
|
||||
size_t lenChunk;
|
||||
for ( const char * const inEnd = in + inLen;; )
|
||||
{
|
||||
// try to convert the current chunk
|
||||
lenChunk = MB2WC(NULL, in, 0);
|
||||
if ( lenChunk == 0 )
|
||||
{
|
||||
// nothing left in the input string, conversion succeeded
|
||||
break;
|
||||
}
|
||||
|
||||
if ( lenChunk == (size_t)-1 )
|
||||
break;
|
||||
|
||||
// if we already have a previous chunk, leave the NUL separating it
|
||||
// from this one
|
||||
if ( lenBuf )
|
||||
lenBuf++;
|
||||
|
||||
const size_t lenBufNew = lenBuf + lenChunk;
|
||||
if ( !wbuf.extend(lenBufNew) )
|
||||
{
|
||||
lenChunk = (size_t)-1;
|
||||
break;
|
||||
}
|
||||
|
||||
lenChunk = MB2WC(wbuf.data() + lenBuf, in, lenChunk + 1 /* for NUL */);
|
||||
if ( lenChunk == (size_t)-1 )
|
||||
break;
|
||||
|
||||
lenBuf = lenBufNew;
|
||||
|
||||
if ( inLen == (size_t)-1 )
|
||||
{
|
||||
// convert only one chunk in this case, as we suppose that the
|
||||
// string is NUL-terminated and so inEnd is not used at all
|
||||
break;
|
||||
}
|
||||
|
||||
// advance the input pointer past the end of this chunk
|
||||
while ( NotAllNULs(in, nulLen) )
|
||||
{
|
||||
// notice that we must skip over multiple bytes here as we suppose
|
||||
// that if NUL takes 2 or 4 bytes, then all the other characters do
|
||||
// too and so if advanced by a single byte we might erroneously
|
||||
// detect sequences of NUL bytes in the middle of the input
|
||||
in += nulLen;
|
||||
}
|
||||
|
||||
in += nulLen; // skipping over its terminator as well
|
||||
|
||||
// note that ">=" (and not just "==") is needed here as the terminator
|
||||
// we skipped just above could be inside or just after the buffer
|
||||
// delimited by inEnd
|
||||
if ( in >= inEnd )
|
||||
break;
|
||||
}
|
||||
|
||||
if ( lenChunk == (size_t)-1 )
|
||||
{
|
||||
// conversion failed
|
||||
lenBuf = 0;
|
||||
wbuf.reset();
|
||||
}
|
||||
|
||||
if ( outLen )
|
||||
*outLen = lenBuf;
|
||||
*outLen = 0;
|
||||
|
||||
return wbuf;
|
||||
return wxWCharBuffer();
|
||||
}
|
||||
|
||||
const wxCharBuffer
|
||||
wxMBConv::cWC2MB(const wchar_t *in, size_t inLen, size_t *outLen) const
|
||||
{
|
||||
// the currently accumulated multibyte characters
|
||||
wxCharBuffer buf;
|
||||
|
||||
// the current length of buf
|
||||
size_t lenBuf = 0;
|
||||
|
||||
// make a copy of the input string unless it is already properly
|
||||
// NUL-terminated
|
||||
//
|
||||
// if we don't know its length we have no choice but to assume that it is,
|
||||
// indeed, properly terminated
|
||||
wxWCharBuffer bufTmp;
|
||||
if ( inLen == (size_t)-1 )
|
||||
const size_t dstLen = FromWChar(NULL, 0, in, inLen);
|
||||
if ( dstLen != wxCONV_FAILED )
|
||||
{
|
||||
inLen = wxWcslen(in) + 1;
|
||||
}
|
||||
else if ( inLen != 0 && in[inLen - 1] != L'\0' )
|
||||
{
|
||||
// make a copy in order to properly NUL-terminate the string
|
||||
bufTmp = wxWCharBuffer(inLen);
|
||||
memcpy(bufTmp.data(), in, inLen*sizeof(wchar_t));
|
||||
}
|
||||
|
||||
if ( bufTmp )
|
||||
in = bufTmp;
|
||||
|
||||
for ( const wchar_t * const inEnd = in + inLen;; )
|
||||
{
|
||||
// try to convert the current chunk, if anything left
|
||||
size_t lenChunk = in < inEnd ? WC2MB(NULL, in, 0) : 0;
|
||||
if ( lenChunk == 0 )
|
||||
wxCharBuffer buf(dstLen);
|
||||
if ( FromWChar(buf.data(), dstLen, in, inLen) )
|
||||
{
|
||||
// nothing left in the input string, conversion succeeded
|
||||
if ( outLen )
|
||||
*outLen = lenBuf ? lenBuf - 1 : lenBuf;
|
||||
|
||||
*outLen = dstLen;
|
||||
return buf;
|
||||
}
|
||||
|
||||
if ( lenChunk == (size_t)-1 )
|
||||
break;
|
||||
|
||||
const size_t lenBufNew = lenBuf + lenChunk;
|
||||
if ( !buf.extend(lenBufNew) )
|
||||
break;
|
||||
|
||||
lenChunk = WC2MB(buf.data() + lenBuf, in, lenChunk + 1 /* for NUL */);
|
||||
if ( lenChunk == (size_t)-1 )
|
||||
break;
|
||||
|
||||
// chunk successfully converted, go to the next one
|
||||
in += wxWcslen(in) + 1 /* skip NUL too */;
|
||||
lenBuf = lenBufNew + 1;
|
||||
}
|
||||
|
||||
// conversion failed
|
||||
if ( outLen )
|
||||
*outLen = 0;
|
||||
|
||||
|
Reference in New Issue
Block a user