Fix wxMBConv_cf to implement FromWChar/ToWChar in lieu of now deprecated WC2MB/MB2WC.

This is a complete rewrite trying built-in UTF-32 conversion first, then falling back to conversion through UTF-16. git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/trunk@47206 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775
2007-07-06 22:38:27 +00:00
parent e2badebb8f
commit 6ff49cbcd4
1 changed files with 159 additions and 84 deletions
--- a/src/common/strconv.cpp
+++ b/src/common/strconv.cpp
@@ -59,6 +59,8 @@
 #ifdef __DARWIN__
 #include <CoreFoundation/CFString.h>
 #include <CoreFoundation/CFStringEncodingExt.h>
 #include "wx/mac/corefoundation/cfref.h"
 #endif //def __DARWIN__
 #ifdef __WXMAC__
@@ -2307,10 +2309,6 @@ private:
 #ifdef __DARWIN__
 // RN: There is no UTF-32 support in either Core Foundation or Cocoa.
 // Strangely enough, internally Core Foundation uses
 // UTF-32 internally quite a bit - its just not public (yet).
 CFStringEncoding wxCFStringEncFromFontEnc(wxFontEncoding encoding)
 {
    CFStringEncoding enc = kCFStringEncodingInvalidId ;
@@ -2439,9 +2437,12 @@ CFStringEncoding wxCFStringEncFromFontEnc(wxFontEncoding encoding)
        case wxFONTENCODING_EUC_JP :
            enc = kCFStringEncodingEUC_JP;
            break ;
 /* Don't support conversion to/from UTF16 as wxWidgets can do this better.
 * In particular, ToWChar would fail miserably using strlen on an input UTF16.
        case wxFONTENCODING_UTF16 :
            enc = kCFStringEncodingUnicode ;
            break ;
 */
        case wxFONTENCODING_MACROMAN :
            enc = kCFStringEncodingMacRoman ;
            break ;
@@ -2571,6 +2572,16 @@ CFStringEncoding wxCFStringEncFromFontEnc(wxFontEncoding encoding)
    return enc ;
 }
 #if MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_4
 // Provide a constant for the wchat_t encoding used by the host platform.
 #ifdef WORDS_BIGENDIAN
    static const CFStringEncoding wxCFStringEncodingWcharT = kCFStringEncodingUTF32BE;
 #else
    static const CFStringEncoding wxCFStringEncodingWcharT = kCFStringEncodingUTF32LE;
 #endif
 #endif /* MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_4 */
 class wxMBConv_cf : public wxMBConv
 {
 public:
@@ -2605,105 +2616,167 @@ public:
        m_encoding = encoding ;
    }
-    size_t MB2WC(wchar_t * szOut, const char * szUnConv, size_t nOutSize) const
+    virtual size_t ToWChar(wchar_t * dst, size_t dstSize, const char * src, size_t srcSize = wxNO_LEN) const
    {
-        wxASSERT(szUnConv);
+        wxCHECK(src, wxCONV_FAILED);
-        CFStringRef theString = CFStringCreateWithBytes (
+        /* NOTE: This is wrong if the source encoding has an element size
         * other than char (e.g. it's kCFStringEncodingUnicode)
         * If the user specifies it, it's presumably right though.
         * Right now we don't support UTF-16 in anyway since wx can do a better job.
         */
        if(srcSize == wxNO_LEN)
            srcSize = strlen(src) + 1;
        // First create the temporary CFString
        wxCFRef<CFStringRef> theString( CFStringCreateWithBytes (
                                                NULL, //the allocator
-                                                (const UInt8*)szUnConv,
+                                                (const UInt8*)src,
-                                                strlen(szUnConv),
+                                                srcSize,
                                                m_encoding,
                                                false //no BOM/external representation
-                                                );
+                                                ));
-        wxASSERT(theString);
+        wxCHECK(theString != NULL, wxCONV_FAILED);
-        size_t nOutLength = CFStringGetLength(theString);
+        /* NOTE: The string content includes the NULL element if the source string did
         * That means we have to do nothing special because the destination will have
         * the NULL element iff the source did and the NULL element will be included
         * in the count iff it was included in the source count.
         */
-        if (szOut == NULL)
+
 /* If we're compiling against Tiger headers we can support direct conversion
 * to UTF32.  If we are then run against a pre-Tiger system, the encoding
 * won't be available so we'll defer to the string->UTF-16->UTF-32 conversion.
 */
 #if MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_4
        if(CFStringIsEncodingAvailable(wxCFStringEncodingWcharT))
        {
-            CFRelease(theString);
+            CFRange fullStringRange = CFRangeMake(0, CFStringGetLength(theString));
-            return nOutLength;
+            CFIndex usedBufLen;
        }
-        CFRange theRange = { 0, nOutSize };
+            CFIndex charsConverted = CFStringGetBytes(
                    theString,
                    fullStringRange,
                    wxCFStringEncodingWcharT,
                    0,
                    false,
                    // if dstSize is 0 then pass NULL to get required length in usedBufLen
                    dstSize != 0?(UInt8*)dst:NULL,
                    dstSize * sizeof(wchar_t),
                    &usedBufLen);
-#if SIZEOF_WCHAR_T == 4
+            // charsConverted is > 0 iff conversion succeeded
-        UniChar* szUniCharBuffer = new UniChar[nOutSize];
+            if(charsConverted <= 0)
-#endif
+                return wxCONV_FAILED;
-        CFStringGetCharacters(theString, theRange, szUniCharBuffer);
+            /* usedBufLen is the number of bytes written, so we divide by
             * sizeof(wchar_t) to get the number of elements written.
             */
            wxASSERT( (usedBufLen % sizeof(wchar_t)) == 0 );
-        CFRelease(theString);
+            // CFStringGetBytes does exactly the right thing when buffer
-
+            // pointer is NULL and returns the number of bytes required
-        szUniCharBuffer[nOutLength] = '\0';
+            return usedBufLen / sizeof(wchar_t);
 #if SIZEOF_WCHAR_T == 4
        wxMBConvUTF16 converter;
        converter.MB2WC( szOut, (const char*)szUniCharBuffer, nOutSize );
        delete [] szUniCharBuffer;
 #endif
        return nOutLength;
    }
    size_t WC2MB(char *szOut, const wchar_t *szUnConv, size_t nOutSize) const
    {
        wxASSERT(szUnConv);
        size_t nRealOutSize;
        size_t nBufSize = wxWcslen(szUnConv);
        UniChar* szUniBuffer = (UniChar*) szUnConv;
 #if SIZEOF_WCHAR_T == 4
        wxMBConvUTF16 converter ;
        nBufSize = converter.WC2MB( NULL, szUnConv, 0 );
        szUniBuffer = new UniChar[ (nBufSize / sizeof(UniChar)) + 1];
        converter.WC2MB( (char*) szUniBuffer, szUnConv, nBufSize + sizeof(UniChar));
        nBufSize /= sizeof(UniChar);
 #endif
        CFStringRef theString = CFStringCreateWithCharactersNoCopy(
                                NULL, //allocator
                                szUniBuffer,
                                nBufSize,
                                kCFAllocatorNull //deallocator - we want to deallocate it ourselves
                            );
        wxASSERT(theString);
        //Note that CER puts a BOM when converting to unicode
        //so we  check and use getchars instead in that case
        if (m_encoding == kCFStringEncodingUnicode)
        {
            if (szOut != NULL)
                CFStringGetCharacters(theString, CFRangeMake(0, nOutSize - 1), (UniChar*) szOut);
            nRealOutSize = CFStringGetLength(theString) + 1;
        }
        else
 #endif /* MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_4 */
        {
-            CFStringGetBytes(
+            // NOTE: Includes NULL iff source did
            /* NOTE: This is an approximation.  The eventual UTF-32 will    
             * possibly have less elements but certainly not more.
             */
            size_t returnSize = CFStringGetLength(theString);
            if (dstSize == 0 || dst == NULL)
            {
                return returnSize;
            }
            // Convert the entire string.. too hard to figure out how many UTF-16 we'd need
            // for an undersized UTF-32 destination buffer.
            CFRange fullStringRange = CFRangeMake(0, CFStringGetLength(theString));
            UniChar *szUniCharBuffer = new UniChar[fullStringRange.length];
            CFStringGetCharacters(theString, fullStringRange, szUniCharBuffer);
            wxMBConvUTF16 converter;
            returnSize = converter.ToWChar( dst, dstSize, (const char*)szUniCharBuffer, fullStringRange.length );
            delete [] szUniCharBuffer;
            return returnSize;
        }
        // NOTREACHED
    }
    virtual size_t FromWChar(char *dst, size_t dstSize, const wchar_t *src, size_t srcSize) const
    {
        wxCHECK(src, wxCONV_FAILED);
        if(srcSize == wxNO_LEN)
            srcSize = wxStrlen(src) + 1;
        // Temporary CFString
        wxCFRef<CFStringRef> theString;
 /* If we're compiling against Tiger headers we can support direct conversion
 * from UTF32.  If we are then run against a pre-Tiger system, the encoding
 * won't be available so we'll defer to the UTF-32->UTF-16->string conversion.
 */
 #if MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_4
        if(CFStringIsEncodingAvailable(wxCFStringEncodingWcharT))
        {
            theString = wxCFRef<CFStringRef>(CFStringCreateWithBytes(
                    kCFAllocatorDefault,
                    (UInt8*)src,
                    srcSize * sizeof(wchar_t),
                    wxCFStringEncodingWcharT,
                    false));
        }
        else
 #endif /* MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_4 */
        {
            wxMBConvUTF16 converter;
            size_t cbUniBuffer = converter.FromWChar( NULL, 0, src, srcSize );
            wxASSERT(cbUniBuffer % sizeof(UniChar));
            // Will be free'd by kCFAllocatorMalloc when CFString is released
            UniChar *tmpUniBuffer = (UniChar*)malloc(cbUniBuffer);
            cbUniBuffer = converter.FromWChar( (char*) tmpUniBuffer, cbUniBuffer, src, srcSize );
            wxASSERT(cbUniBuffer % sizeof(UniChar));
            theString = wxCFRef<CFStringRef>(CFStringCreateWithCharactersNoCopy(
                        kCFAllocatorDefault,
                        tmpUniBuffer,
                        cbUniBuffer / sizeof(UniChar),
                        kCFAllocatorMalloc
                    ));
        }
        wxCHECK(theString != NULL, wxCONV_FAILED);
        CFIndex usedBufLen;
        CFIndex charsConverted = CFStringGetBytes(
                theString, 
                CFRangeMake(0, CFStringGetLength(theString)),
                m_encoding,
-                0, //what to put in characters that can't be converted -
+                0, // FAIL on unconvertible characters
-                    //0 tells CFString to return NULL if it meets such a character
+                false, // not an external representation
-                false, //not an external representation
+                // if dstSize is 0 then pass NULL to get required length in usedBufLen
-                (UInt8*) szOut,
+                (dstSize != 0)?(UInt8*)dst:NULL,
-                nOutSize,
+                dstSize,
-                (CFIndex*) &nRealOutSize
+                &usedBufLen
            );
        }
-        CFRelease(theString);
+        // charsConverted is > 0 iff conversion succeeded
        if(charsConverted <= 0)
            return wxCONV_FAILED;
-#if SIZEOF_WCHAR_T == 4
+        return usedBufLen;
        delete[] szUniBuffer;
 #endif
        return  nRealOutSize - 1;
    }
    virtual wxMBConv *Clone() const { return new wxMBConv_cf(*this); }
@@ -3421,7 +3494,9 @@ wxMBConv *wxCSConv::DoCreate() const
 #ifdef __DARWIN__
    {
-        if ( m_name || ( m_encoding <= wxFONTENCODING_UTF16 ) )
+        // leave UTF16 and UTF32 to the built-ins of wx
        if ( m_name || ( m_encoding < wxFONTENCODING_UTF16BE ||
            ( m_encoding >= wxFONTENCODING_MACMIN && m_encoding <= wxFONTENCODING_MACMAX ) ) )
        {
 #if wxUSE_FONTMAP
            wxMBConv_cf *conv = m_name ? new wxMBConv_cf(m_name)