added new To/FromWChar() API with more reasonable semantics than old MB2WC/WC2MB; for now both coexist and the change is/should be backwards compatible

git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/trunk@38541 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775
This commit is contained in:
Vadim Zeitlin
2006-04-04 12:35:21 +00:00
parent 7ef3ab50e9
commit 483b0434bf
3 changed files with 320 additions and 184 deletions

View File

@@ -34,6 +34,14 @@ current locale.}
depends on the system.} depends on the system.}
\end{twocollist} \end{twocollist}
\wxheading{Constants}
\texttt{wxCONV\_FAILED} value is defined as \texttt{(size\_t)$-1$} and is
returned by the conversion functions instead of the length of the converted
string if the conversion fails.
\wxheading{Derived from} \wxheading{Derived from}
No base class No base class
@@ -48,6 +56,7 @@ No base class
\helpref{wxEncodingConverter}{wxencodingconverter}, \helpref{wxEncodingConverter}{wxencodingconverter},
\helpref{wxMBConv classes overview}{mbconvclasses} \helpref{wxMBConv classes overview}{mbconvclasses}
\latexignore{\rtfignore{\wxheading{Members}}} \latexignore{\rtfignore{\wxheading{Members}}}
@@ -55,12 +64,15 @@ No base class
\func{}{wxMBConv}{\void} \func{}{wxMBConv}{\void}
Constructor. Trivial default constructor.
\membersection{wxMBConv::MB2WC}\label{wxmbconvmb2wc} \membersection{wxMBConv::MB2WC}\label{wxmbconvmb2wc}
\constfunc{virtual size\_t}{MB2WC}{\param{wchar\_t *}{out}, \param{const char *}{in}, \param{size\_t }{outLen}} \constfunc{virtual size\_t}{MB2WC}{\param{wchar\_t *}{out}, \param{const char *}{in}, \param{size\_t }{outLen}}
\deprecated{\helpref{ToWChar}{wxmbconvtowchar}}
Converts from a string \arg{in} in multibyte encoding to Unicode putting up to Converts from a string \arg{in} in multibyte encoding to Unicode putting up to
\arg{outLen} characters into the buffer \arg{out}. \arg{outLen} characters into the buffer \arg{out}.
@@ -89,6 +101,8 @@ The length of the converted string \emph{excluding} the trailing \NUL.
\constfunc{virtual size\_t}{WC2MB}{\param{char* }{buf}, \param{const wchar\_t* }{psz}, \param{size\_t }{n}} \constfunc{virtual size\_t}{WC2MB}{\param{char* }{buf}, \param{const wchar\_t* }{psz}, \param{size\_t }{n}}
\deprecated{\helpref{FromWChar}{wxmbconvfromwchar}}
Converts from Unicode to multibyte encoding. The semantics of this function Converts from Unicode to multibyte encoding. The semantics of this function
(including the return value meaning) is the same as for (including the return value meaning) is the same as for
\helpref{MB2WC}{wxmbconvmb2wc}. \helpref{MB2WC}{wxmbconvmb2wc}.
@@ -191,6 +205,45 @@ result in a wxWCharBuffer. The macro wxWX2WCbuf is defined as the correct
return type (without const). return type (without const).
\membersection{wxMBConv::FromWChar}\label{wxmbconvfromwchar}
\constfunc{virtual size\_t}{FromWChar}{\param{wchar\_t *}{dst}, \param{size\_t }{dstLen}, \param{const char *}{src}, \param{size\_t }{srcLen = $-1$}}
The most general function for converting a multibyte string to a wide string.
The main case is when \arg{dst} is not \NULL and \arg{srcLen} is not $-1$: then
the function converts exactly \arg{srcLen} bytes starting at \arg{src} into
wide string which it output to \arg{dst}. If the length of the resulting wide
string is greater than \arg{dstLen}, an error is returned. Note that if
\arg{srcLen} bytes don't include \NUL characters, the resulting wide string is
not \NUL-terminated neither.
If \arg{srcLen} is $-1$, the function supposes that the string is properly
(i.e. as necessary for the encoding handled by this conversion) \NUL-terminated
and converts the entire string, including any trailing \NUL bytes. In this case
the wide string is also \NUL-terminated.
Finally, if \arg{dst} is \NULL, the function returns the length of the needed
buffer.
\wxheading{Return value}
The number of characters written to \arg{dst} (or the number of characters
which would have been written to it if it were non-\NULL) on success or
\texttt{wxCONV\_FAILED} on error.
\membersection{wxMBConv::GetMaxMBNulLen}\label{wxmbconvgetmaxmbnullen}
\func{const size\_t}{GetMaxMBNulLen}{\void}
Returns the maximal value which can be returned by
\helpref{GetMBNulLen}{wxmbconvgetmbnullen} for any conversion object. Currently
this value is $4$.
This method can be used to allocate the buffer with enough space for the
trailing \NUL characters for any encoding.
\membersection{wxMBConv::GetMBNulLen}\label{wxmbconvgetmbnullen} \membersection{wxMBConv::GetMBNulLen}\label{wxmbconvgetmbnullen}
\constfunc{size\_t}{GetMBNulLen}{\void} \constfunc{size\_t}{GetMBNulLen}{\void}
@@ -201,3 +254,11 @@ which the string is terminated with $2$ and $4$ \NUL characters respectively.
The other cases are not currently supported and $-1$ is returned for them. The other cases are not currently supported and $-1$ is returned for them.
\membersection{wxMBConv::ToWChar}\label{wxmbconvtowchar}
\constfunc{virtual size\_t}{ToWChar}{\param{char\_t *}{dst}, \param{size\_t }{dstLen}, \param{const wchar\_t *}{src}, \param{size\_t }{srcLen = $-1$}}
This function has the same semantics as \helpref{FromWChar}{wxmbconvfromwchar}
except that it converts a wide string to multibyte one.

View File

@@ -28,6 +28,9 @@
#if wxUSE_WCHAR_T #if wxUSE_WCHAR_T
// the error value returned by wxMBConv methods
#define wxCONV_FAILED ((size_t)-1)
// ---------------------------------------------------------------------------- // ----------------------------------------------------------------------------
// wxMBConv (abstract base class for conversions) // wxMBConv (abstract base class for conversions)
// ---------------------------------------------------------------------------- // ----------------------------------------------------------------------------
@@ -35,24 +38,43 @@
class WXDLLIMPEXP_BASE wxMBConv class WXDLLIMPEXP_BASE wxMBConv
{ {
public: public:
// The functions doing actual conversion. On success, the return value is // The functions doing actual conversion from/to narrow to/from wide
// the length (i.e. the number of characters, not bytes, and not counting // character strings.
// the trailing L'\0') of the converted string. On failure, (size_t)-1 is
// returned. In the special case when outputBuf is NULL the return value is
// the same one but nothing is written to the buffer.
// //
// Note that outLen is the length of the output buffer, not the length of // On success, the return value is the length (i.e. the number of
// the input (which is always supposed to be terminated by one or more // characters, not bytes) of the converted string including any trailing
// NULs, as appropriate for the encoding)! // L'\0' or (possibly multiple) '\0'(s). If the conversion fails or if
virtual size_t MB2WC(wchar_t *out, const char *in, size_t outLen) const = 0; // there is not enough space for everything, including the trailing NUL
virtual size_t WC2MB(char *out, const wchar_t *in, size_t outLen) const = 0; // character(s), in the output buffer, (size_t)-1 is returned.
//
// In the special case when dstLen is 0 (outputBuf may be NULL then) the
// return value is the length of the needed buffer but nothing happens
// otherwise. If srcLen is -1, the entire string, up to and including the
// trailing NUL(s), is converted, otherwise exactly srcLen bytes are.
//
// Typical usage:
//
// size_t dstLen = conv.ToWChar(NULL, 0, src);
// if ( dstLen != wxCONV_FAILED )
// ... handle error ...
// wchar_t *wbuf = new wchar_t[dstLen];
// conv.ToWChar(wbuf, dstLen, src);
//
virtual size_t ToWChar(wchar_t *dst, size_t dstLen,
const char *src, size_t srcLen = -1) const;
// MB <-> WC virtual size_t FromWChar(char *dst, size_t dstLen,
const wchar_t *src, size_t srcLen = -1) const;
// Convenience functions for translating NUL-terminated strings: returns
// the buffer containing the converted string or NULL pointer if the
// conversion failed.
const wxWCharBuffer cMB2WC(const char *in) const; const wxWCharBuffer cMB2WC(const char *in) const;
const wxCharBuffer cWC2MB(const wchar_t *in) const; const wxCharBuffer cWC2MB(const wchar_t *in) const;
// Functions converting strings which may contain embedded NULs and don't // Convenience functions for converting strings which may contain embedded
// have to be NUL-terminated. // NULs and don't have to be NUL-terminated.
// //
// inLen is the length of the buffer including trailing NUL if any: if the // inLen is the length of the buffer including trailing NUL if any: if the
// last 4 bytes of the buffer are all NULs, these functions are more // last 4 bytes of the buffer are all NULs, these functions are more
@@ -94,6 +116,31 @@ public:
// anything else is not supported currently and -1 should be returned // anything else is not supported currently and -1 should be returned
virtual size_t GetMBNulLen() const { return 1; } virtual size_t GetMBNulLen() const { return 1; }
// return the maximal value currently returned by GetMBNulLen() for any
// encoding
static size_t GetMaxMBNulLen() { return 4 /* for UTF-32 */; }
// The old conversion functions. The existing classes currently mostly
// implement these ones but we're in transition to using To/FromWChar()
// instead and any new classes should implement just the new functions.
// For now, however, we provide default implementation of To/FromWChar() in
// this base class in terms of MB2WC/WC2MB() to avoid having to rewrite all
// the conversions at once.
//
// On success, the return value is the length (i.e. the number of
// characters, not bytes) not counting the trailing NUL(s) of the converted
// string. On failure, (size_t)-1 is returned. In the special case when
// outputBuf is NULL the return value is the same one but nothing is
// written to the buffer.
//
// Note that outLen is the length of the output buffer, not the length of
// the input (which is always supposed to be terminated by one or more
// NULs, as appropriate for the encoding)!
virtual size_t MB2WC(wchar_t *out, const char *in, size_t outLen) const = 0;
virtual size_t WC2MB(char *out, const wchar_t *in, size_t outLen) const = 0;
// virtual dtor for any base class // virtual dtor for any base class
virtual ~wxMBConv(); virtual ~wxMBConv();
}; };

View File

@@ -147,6 +147,175 @@ static size_t decode_utf16(const wxUint16* input, wxUint32& output)
// wxMBConv // wxMBConv
// ---------------------------------------------------------------------------- // ----------------------------------------------------------------------------
size_t
wxMBConv::ToWChar(wchar_t *dst, size_t dstLen,
const char *src, size_t srcLen) const
{
// although new conversion classes are supposed to implement this function
// directly, the existins ones only implement the old MB2WC() and so, to
// avoid to have to rewrite all conversion classes at once, we provide a
// default (but not efficient) implementation of this one in terms of the
// old function by copying the input to ensure that it's NUL-terminated and
// then using MB2WC() to convert it
// the number of chars [which would be] written to dst [if it were not NULL]
size_t dstWritten = 0;
// the number of NULs terminating this string
size_t nulLen wxDUMMY_INITIALIZE(0);
// if we were not given the input size we just have to assume that the
// string is properly terminated as we have no way of knowing how long it
// is anyhow, but if we do have the size check whether there are enough
// NULs at the end
wxCharBuffer bufTmp;
const char *srcEnd;
if ( srcLen != (size_t)-1 )
{
// we need to know how to find the end of this string
nulLen = GetMBNulLen();
if ( nulLen == wxCONV_FAILED )
return wxCONV_FAILED;
// if there are enough NULs we can avoid the copy
if ( srcLen < nulLen || NotAllNULs(src + srcLen - nulLen, nulLen) )
{
// make a copy in order to properly NUL-terminate the string
bufTmp = wxCharBuffer(srcLen + nulLen - 1 /* 1 will be added */);
char * const p = bufTmp.data();
memcpy(p, src, srcLen);
for ( char *s = p + srcLen; s < p + srcLen + nulLen; s++ )
*s = '\0';
src = bufTmp;
}
srcEnd = src + srcLen;
}
else // quit after the first loop iteration
{
srcEnd = NULL;
}
for ( ;; )
{
// try to convert the current chunk
size_t lenChunk = MB2WC(NULL, src, 0);
if ( lenChunk == 0 )
{
// nothing left in the input string, conversion succeeded
break;
}
if ( lenChunk == wxCONV_FAILED )
return wxCONV_FAILED;
// if we already have a previous chunk, leave the NUL separating it
// from this one
if ( dstWritten )
{
dstWritten++;
if ( dst )
dst++;
}
dstWritten += lenChunk;
if ( dst )
{
if ( dstWritten > dstLen )
return wxCONV_FAILED;
lenChunk = MB2WC(dst, src, lenChunk + 1 /* for NUL */);
if ( lenChunk == wxCONV_FAILED )
return wxCONV_FAILED;
dst += lenChunk;
}
if ( !srcEnd )
{
// we convert the entire string in this cas, as we suppose that the
// string is NUL-terminated and so srcEnd is not used at all
break;
}
// advance the input pointer past the end of this chunk
while ( NotAllNULs(src, nulLen) )
{
// notice that we must skip over multiple bytes here as we suppose
// that if NUL takes 2 or 4 bytes, then all the other characters do
// too and so if advanced by a single byte we might erroneously
// detect sequences of NUL bytes in the middle of the input
src += nulLen;
}
src += nulLen; // skipping over its terminator as well
// note that ">=" (and not just "==") is needed here as the terminator
// we skipped just above could be inside or just after the buffer
// delimited by inEnd
if ( src >= srcEnd )
break;
}
return dstWritten;
}
size_t
wxMBConv::FromWChar(char *dst, size_t dstLen,
const wchar_t *src, size_t srcLen) const
{
// the number of chars [which would be] written to dst [if it were not NULL]
size_t dstWritten = 0;
// make a copy of the input string unless it is already properly
// NUL-terminated
//
// if we don't know its length we have no choice but to assume that it is,
// indeed, properly terminated
wxWCharBuffer bufTmp;
if ( srcLen == (size_t)-1 )
{
srcLen = wxWcslen(src) + 1;
}
else if ( srcLen != 0 && src[srcLen - 1] != L'\0' )
{
// make a copy in order to properly NUL-terminate the string
bufTmp = wxWCharBuffer(srcLen);
memcpy(bufTmp.data(), src, srcLen*sizeof(wchar_t));
src = bufTmp;
}
const size_t lenNul = GetMBNulLen();
for ( const wchar_t * const srcEnd = src + srcLen;
src < srcEnd;
src += wxWcslen(src) + 1 /* skip L'\0' too */ )
{
// try to convert the current chunk
size_t lenChunk = WC2MB(NULL, src, 0);
if ( lenChunk == wxCONV_FAILED )
return wxCONV_FAILED;
lenChunk += lenNul;
dstWritten += lenChunk;
if ( dst )
{
if ( dstWritten > dstLen )
return wxCONV_FAILED;
if ( WC2MB(dst, src, lenChunk) == wxCONV_FAILED )
return wxCONV_FAILED;
dst += lenChunk;
}
}
return dstWritten;
}
wxMBConv::~wxMBConv() wxMBConv::~wxMBConv()
{ {
// nothing to do here (necessary for Darwin linking probably) // nothing to do here (necessary for Darwin linking probably)
@@ -157,217 +326,76 @@ const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
if ( psz ) if ( psz )
{ {
// calculate the length of the buffer needed first // calculate the length of the buffer needed first
size_t nLen = MB2WC(NULL, psz, 0); const size_t nLen = MB2WC(NULL, psz, 0);
if ( nLen != (size_t)-1 ) if ( nLen != wxCONV_FAILED )
{ {
// now do the actual conversion // now do the actual conversion
wxWCharBuffer buf(nLen); wxWCharBuffer buf(nLen /* +1 added implicitly */);
nLen = MB2WC(buf.data(), psz, nLen + 1); // with the trailing NULL
if ( nLen != (size_t)-1 ) // +1 for the trailing NULL
{ if ( MB2WC(buf.data(), psz, nLen + 1) != wxCONV_FAILED )
return buf; return buf;
} }
} }
}
wxWCharBuffer buf((wchar_t *)NULL); return wxWCharBuffer();
return buf;
} }
const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const
{ {
if ( pwz ) if ( pwz )
{ {
size_t nLen = WC2MB(NULL, pwz, 0); const size_t nLen = WC2MB(NULL, pwz, 0);
if ( nLen != (size_t)-1 ) if ( nLen != wxCONV_FAILED )
{
wxCharBuffer buf(nLen+3); // space for a wxUint32 trailing zero
nLen = WC2MB(buf.data(), pwz, nLen + 4);
if ( nLen != (size_t)-1 )
{ {
// extra space for trailing NUL(s)
static const size_t extraLen = GetMaxMBNulLen();
wxCharBuffer buf(nLen + extraLen - 1);
if ( WC2MB(buf.data(), pwz, nLen + extraLen) != wxCONV_FAILED )
return buf; return buf;
} }
} }
}
wxCharBuffer buf((char *)NULL); return wxCharBuffer();
return buf;
} }
const wxWCharBuffer const wxWCharBuffer
wxMBConv::cMB2WC(const char *in, size_t inLen, size_t *outLen) const wxMBConv::cMB2WC(const char *in, size_t inLen, size_t *outLen) const
{ {
// the currently accumulated wide characters const size_t dstLen = ToWChar(NULL, 0, in, inLen);
wxWCharBuffer wbuf; if ( dstLen != wxCONV_FAILED )
// the current length of wbuf
size_t lenBuf = 0;
// the number of NULs terminating this string
size_t nulLen wxDUMMY_INITIALIZE(0);
// make a copy of the input string unless it is already properly
// NUL-terminated
wxCharBuffer bufTmp;
// if we were not given the input size we just have to assume that the
// string is properly terminated as we have no way of knowing how long it
// is anyhow, but if we do have the size check whether there are enough
// NULs at the end
if ( inLen != (size_t)-1 )
{ {
// we need to know how to find the end of this string wxWCharBuffer wbuf(dstLen);
nulLen = GetMBNulLen(); if ( ToWChar(wbuf.data(), dstLen, in, inLen) )
if ( nulLen == (size_t)-1 ) {
if ( outLen )
*outLen = dstLen;
return wbuf; return wbuf;
// if there are enough NULs we can avoid the copy
if ( inLen < nulLen || NotAllNULs(in + inLen - nulLen, nulLen) )
{
// make a copy in order to properly NUL-terminate the string
bufTmp = wxCharBuffer(inLen + nulLen - 1 /* 1 will be added */);
char * const p = bufTmp.data();
memcpy(p, in, inLen);
for ( char *s = p + inLen; s < p + inLen + nulLen; s++ )
*s = '\0';
} }
} }
if ( bufTmp )
in = bufTmp;
size_t lenChunk;
for ( const char * const inEnd = in + inLen;; )
{
// try to convert the current chunk
lenChunk = MB2WC(NULL, in, 0);
if ( lenChunk == 0 )
{
// nothing left in the input string, conversion succeeded
break;
}
if ( lenChunk == (size_t)-1 )
break;
// if we already have a previous chunk, leave the NUL separating it
// from this one
if ( lenBuf )
lenBuf++;
const size_t lenBufNew = lenBuf + lenChunk;
if ( !wbuf.extend(lenBufNew) )
{
lenChunk = (size_t)-1;
break;
}
lenChunk = MB2WC(wbuf.data() + lenBuf, in, lenChunk + 1 /* for NUL */);
if ( lenChunk == (size_t)-1 )
break;
lenBuf = lenBufNew;
if ( inLen == (size_t)-1 )
{
// convert only one chunk in this case, as we suppose that the
// string is NUL-terminated and so inEnd is not used at all
break;
}
// advance the input pointer past the end of this chunk
while ( NotAllNULs(in, nulLen) )
{
// notice that we must skip over multiple bytes here as we suppose
// that if NUL takes 2 or 4 bytes, then all the other characters do
// too and so if advanced by a single byte we might erroneously
// detect sequences of NUL bytes in the middle of the input
in += nulLen;
}
in += nulLen; // skipping over its terminator as well
// note that ">=" (and not just "==") is needed here as the terminator
// we skipped just above could be inside or just after the buffer
// delimited by inEnd
if ( in >= inEnd )
break;
}
if ( lenChunk == (size_t)-1 )
{
// conversion failed
lenBuf = 0;
wbuf.reset();
}
if ( outLen ) if ( outLen )
*outLen = lenBuf; *outLen = 0;
return wbuf; return wxWCharBuffer();
} }
const wxCharBuffer const wxCharBuffer
wxMBConv::cWC2MB(const wchar_t *in, size_t inLen, size_t *outLen) const wxMBConv::cWC2MB(const wchar_t *in, size_t inLen, size_t *outLen) const
{ {
// the currently accumulated multibyte characters const size_t dstLen = FromWChar(NULL, 0, in, inLen);
wxCharBuffer buf; if ( dstLen != wxCONV_FAILED )
// the current length of buf
size_t lenBuf = 0;
// make a copy of the input string unless it is already properly
// NUL-terminated
//
// if we don't know its length we have no choice but to assume that it is,
// indeed, properly terminated
wxWCharBuffer bufTmp;
if ( inLen == (size_t)-1 )
{ {
inLen = wxWcslen(in) + 1; wxCharBuffer buf(dstLen);
} if ( FromWChar(buf.data(), dstLen, in, inLen) )
else if ( inLen != 0 && in[inLen - 1] != L'\0' )
{ {
// make a copy in order to properly NUL-terminate the string
bufTmp = wxWCharBuffer(inLen);
memcpy(bufTmp.data(), in, inLen*sizeof(wchar_t));
}
if ( bufTmp )
in = bufTmp;
for ( const wchar_t * const inEnd = in + inLen;; )
{
// try to convert the current chunk, if anything left
size_t lenChunk = in < inEnd ? WC2MB(NULL, in, 0) : 0;
if ( lenChunk == 0 )
{
// nothing left in the input string, conversion succeeded
if ( outLen ) if ( outLen )
*outLen = lenBuf ? lenBuf - 1 : lenBuf; *outLen = dstLen;
return buf; return buf;
} }
if ( lenChunk == (size_t)-1 )
break;
const size_t lenBufNew = lenBuf + lenChunk;
if ( !buf.extend(lenBufNew) )
break;
lenChunk = WC2MB(buf.data() + lenBuf, in, lenChunk + 1 /* for NUL */);
if ( lenChunk == (size_t)-1 )
break;
// chunk successfully converted, go to the next one
in += wxWcslen(in) + 1 /* skip NUL too */;
lenBuf = lenBufNew + 1;
} }
// conversion failed
if ( outLen ) if ( outLen )
*outLen = 0; *outLen = 0;