Produce NFD in wxConvFileName::FromWChar() on macOS

This ensures that fn_str() returns the string in the expected, decomposed, format. Also simplify the code by removing workarounds for old systems which are not supported any more and make explicit the fact that under macOS ToWChar() always produced NFC.
2020-07-13 12:05:29 +02:00
parent 47cc0b8946
commit ab858b5805
3 changed files with 42 additions and 75 deletions
--- a/include/wx/osx/core/private/strconv_cf.h
+++ b/include/wx/osx/core/private/strconv_cf.h
@@ -290,35 +290,44 @@ inline CFStringEncoding wxCFStringEncFromFontEnc(wxFontEncoding encoding)
 class wxMBConv_cf : public wxMBConv
 {
 public:
+    enum NormalizationForm
+    {
+        None = 0x00,
+        FromWChar_D = 0x01,
+        ToWChar_C = 0x02
+    };
+
    wxMBConv_cf()
    {
-        Init(CFStringGetSystemEncoding()) ;
+        Init(CFStringGetSystemEncoding(), ToWChar_C) ;
    }

    wxMBConv_cf(const wxMBConv_cf& conv) : wxMBConv()
    {
        m_encoding = conv.m_encoding;
+        m_normalization = conv.m_normalization;
    }

 #if wxUSE_FONTMAP
-    wxMBConv_cf(const char* name)
+    wxMBConv_cf(const char* name, NormalizationForm normalization = ToWChar_C)
    {
-        Init( wxCFStringEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ;
+        Init( wxCFStringEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) , normalization) ;
    }
 #endif

-    wxMBConv_cf(wxFontEncoding encoding)
+    wxMBConv_cf(wxFontEncoding encoding, NormalizationForm normalization = ToWChar_C )
    {
-        Init( wxCFStringEncFromFontEnc(encoding) );
+        Init( wxCFStringEncFromFontEnc(encoding) , normalization);
    }

    virtual ~wxMBConv_cf()
    {
    }

-    void Init( CFStringEncoding encoding)
+    void Init( CFStringEncoding encoding, NormalizationForm normalization )
    {
        m_encoding = encoding ;
+        m_normalization = normalization;
    }

    virtual size_t ToWChar(wchar_t * dst, size_t dstSize, const char * src, size_t srcSize = wxNO_LEN) const wxOVERRIDE;
@@ -333,9 +342,19 @@ public:
    }

 private:
+    NormalizationForm m_normalization ;
    CFStringEncoding m_encoding ;
 };

+// This "decomposing" converter is used as wxConvFileName in wxOSX.
+class wxMBConvD_cf : public wxMBConv_cf
+{
+public:
+    wxMBConvD_cf(wxFontEncoding encoding) : wxMBConv_cf(encoding, (NormalizationForm) (ToWChar_C | FromWChar_D) )
+    {
+    }
+};
+
 // corresponding class for holding UniChars (native unicode characters)

 class WXDLLIMPEXP_BASE wxMacUniCharBuffer
--- a/src/common/strconv.cpp
+++ b/src/common/strconv.cpp
@@ -3307,7 +3307,7 @@ WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvUI = wxGet_wxConvLocalPtr();
 // It is important to use this conversion object under Darwin as it ensures
 // that Unicode strings are (re)composed correctly even though xnu kernel uses
 // decomposed form internally (at least for the file names).
-static wxMBConv_cf wxConvMacUTF8DObj(wxFONTENCODING_UTF8);
+static wxMBConvD_cf wxConvMacUTF8DObj(wxFONTENCODING_UTF8);
 #endif

 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvFileName =
--- a/src/osx/core/strconv_cf.cpp
+++ b/src/osx/core/strconv_cf.cpp
@@ -89,13 +89,16 @@ WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_cf(wxFontEncoding encoding)
        if ( theString == NULL )
            return wxCONV_FAILED;

-        // Ensure that the string is in canonical composed form (NFC): this is
-        // important because Darwin uses decomposed form (NFD) for e.g. file
-        // names but we want to use NFC internally.
-        wxCFRef<CFMutableStringRef>
+        if ( m_normalization & ToWChar_C )
+        {
+            // Ensure that the string is in canonical composed form (NFC): this is
+            // important because Darwin uses decomposed form (NFD) for e.g. file
+            // names but we want to use NFC internally.
+            wxCFRef<CFMutableStringRef>
            cfMutableString(CFStringCreateMutableCopy(NULL, 0, theString));
-        CFStringNormalize(cfMutableString, kCFStringNormalizationFormC);
-        theString = cfMutableString;
+            CFStringNormalize(cfMutableString, kCFStringNormalizationFormC);
+            theString = cfMutableString;
+        }

        /* NOTE: The string content includes the NULL element if the source string did
         * That means we have to do nothing special because the destination will have
@@ -103,13 +106,6 @@ WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_cf(wxFontEncoding encoding)
         * in the count iff it was included in the source count.
         */

-
-/* If we're compiling against Tiger headers we can support direct conversion
- * to UTF32.  If we are then run against a pre-Tiger system, the encoding
- * won't be available so we'll defer to the string->UTF-16->UTF-32 conversion.
- */
-        if(CFStringIsEncodingAvailable(wxCFStringEncodingWcharT))
-        {
            CFRange fullStringRange = CFRangeMake(0, CFStringGetLength(theString));
            CFIndex usedBufLen;

@@ -135,34 +131,7 @@ WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_cf(wxFontEncoding encoding)
            // CFStringGetBytes does exactly the right thing when buffer
            // pointer is NULL and returns the number of bytes required
            return usedBufLen / sizeof(wchar_t);
-        }
-        else
-        {
-            // NOTE: Includes NULL iff source did
-            /* NOTE: This is an approximation.  The eventual UTF-32 will
-             * possibly have less elements but certainly not more.
-             */
-            size_t returnSize = CFStringGetLength(theString);

-            if (dstSize == 0 || dst == NULL)
-            {
-                return returnSize;
-            }
-
-            // Convert the entire string.. too hard to figure out how many UTF-16 we'd need
-            // for an undersized UTF-32 destination buffer.
-            CFRange fullStringRange = CFRangeMake(0, CFStringGetLength(theString));
-            UniChar *szUniCharBuffer = new UniChar[fullStringRange.length];
-
-            CFStringGetCharacters(theString, fullStringRange, szUniCharBuffer);
-
-            wxMBConvUTF16 converter;
-            returnSize = converter.ToWChar( dst, dstSize, (const char*)szUniCharBuffer, fullStringRange.length );
-            delete [] szUniCharBuffer;
-
-            return returnSize;
-        }
-        // NOTREACHED
    }

    size_t wxMBConv_cf::FromWChar(char *dst, size_t dstSize, const wchar_t *src, size_t srcSize) const
@@ -175,44 +144,23 @@ WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_cf(wxFontEncoding encoding)
        // Temporary CFString
        wxCFRef<CFStringRef> theString;

-/* If we're compiling against Tiger headers we can support direct conversion
- * from UTF32.  If we are then run against a pre-Tiger system, the encoding
- * won't be available so we'll defer to the UTF-32->UTF-16->string conversion.
- */
-        if(CFStringIsEncodingAvailable(wxCFStringEncodingWcharT))
-        {
            theString = wxCFRef<CFStringRef>(CFStringCreateWithBytes(
                    kCFAllocatorDefault,
                    reinterpret_cast<const UInt8*>(src),
                    srcSize * sizeof(wchar_t),
                    wxCFStringEncodingWcharT,
                    false));
-        }
-        else
-        {
-            wxMBConvUTF16 converter;
-            size_t cbUniBuffer = converter.FromWChar( NULL, 0, src, srcSize );
-            wxASSERT(cbUniBuffer % sizeof(UniChar));
-
-            // Will be free'd by kCFAllocatorMalloc when CFString is released
-            UniChar *tmpUniBuffer = (UniChar*)malloc(cbUniBuffer);
-
-            cbUniBuffer = converter.FromWChar( (char*) tmpUniBuffer, cbUniBuffer, src, srcSize );
-            wxASSERT(cbUniBuffer % sizeof(UniChar));
-
-            theString = wxCFRef<CFStringRef>(CFStringCreateWithCharactersNoCopy(
-                        kCFAllocatorDefault,
-                        tmpUniBuffer,
-                        cbUniBuffer / sizeof(UniChar),
-                        kCFAllocatorMalloc
-                    ));
-
-        }

        wxCHECK(theString != NULL, wxCONV_FAILED);

-        CFIndex usedBufLen;
+        if ( m_normalization & FromWChar_D )
+        {
+            wxCFRef<CFMutableStringRef> normalizedFormD = CFStringCreateMutableCopy(kCFAllocatorDefault,0,theString);
+            CFStringNormalize(normalizedFormD, kCFStringNormalizationFormD);
+            theString = normalizedFormD;
+        }

+        CFIndex usedBufLen;
        CFIndex charsConverted = CFStringGetBytes(
                theString,
                CFRangeMake(0, CFStringGetLength(theString)),