From ab858b58053c925c7165322fe1975355f1fe8596 Mon Sep 17 00:00:00 2001 From: Stefan Csomor Date: Mon, 13 Jul 2020 12:05:29 +0200 Subject: [PATCH] Produce NFD in wxConvFileName::FromWChar() on macOS This ensures that fn_str() returns the string in the expected, decomposed, format. Also simplify the code by removing workarounds for old systems which are not supported any more and make explicit the fact that under macOS ToWChar() always produced NFC. --- include/wx/osx/core/private/strconv_cf.h | 31 +++++++-- src/common/strconv.cpp | 2 +- src/osx/core/strconv_cf.cpp | 84 +++++------------------- 3 files changed, 42 insertions(+), 75 deletions(-) diff --git a/include/wx/osx/core/private/strconv_cf.h b/include/wx/osx/core/private/strconv_cf.h index e39a9218ca..e61b5a00f8 100644 --- a/include/wx/osx/core/private/strconv_cf.h +++ b/include/wx/osx/core/private/strconv_cf.h @@ -290,35 +290,44 @@ inline CFStringEncoding wxCFStringEncFromFontEnc(wxFontEncoding encoding) class wxMBConv_cf : public wxMBConv { public: + enum NormalizationForm + { + None = 0x00, + FromWChar_D = 0x01, + ToWChar_C = 0x02 + }; + wxMBConv_cf() { - Init(CFStringGetSystemEncoding()) ; + Init(CFStringGetSystemEncoding(), ToWChar_C) ; } wxMBConv_cf(const wxMBConv_cf& conv) : wxMBConv() { m_encoding = conv.m_encoding; + m_normalization = conv.m_normalization; } #if wxUSE_FONTMAP - wxMBConv_cf(const char* name) + wxMBConv_cf(const char* name, NormalizationForm normalization = ToWChar_C) { - Init( wxCFStringEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ; + Init( wxCFStringEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) , normalization) ; } #endif - wxMBConv_cf(wxFontEncoding encoding) + wxMBConv_cf(wxFontEncoding encoding, NormalizationForm normalization = ToWChar_C ) { - Init( wxCFStringEncFromFontEnc(encoding) ); + Init( wxCFStringEncFromFontEnc(encoding) , normalization); } virtual ~wxMBConv_cf() { } - void Init( CFStringEncoding encoding) + void Init( CFStringEncoding encoding, NormalizationForm normalization ) { m_encoding = encoding ; + m_normalization = normalization; } virtual size_t ToWChar(wchar_t * dst, size_t dstSize, const char * src, size_t srcSize = wxNO_LEN) const wxOVERRIDE; @@ -333,9 +342,19 @@ public: } private: + NormalizationForm m_normalization ; CFStringEncoding m_encoding ; }; +// This "decomposing" converter is used as wxConvFileName in wxOSX. +class wxMBConvD_cf : public wxMBConv_cf +{ +public: + wxMBConvD_cf(wxFontEncoding encoding) : wxMBConv_cf(encoding, (NormalizationForm) (ToWChar_C | FromWChar_D) ) + { + } +}; + // corresponding class for holding UniChars (native unicode characters) class WXDLLIMPEXP_BASE wxMacUniCharBuffer diff --git a/src/common/strconv.cpp b/src/common/strconv.cpp index ba04f3c144..ba25dae157 100644 --- a/src/common/strconv.cpp +++ b/src/common/strconv.cpp @@ -3307,7 +3307,7 @@ WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvUI = wxGet_wxConvLocalPtr(); // It is important to use this conversion object under Darwin as it ensures // that Unicode strings are (re)composed correctly even though xnu kernel uses // decomposed form internally (at least for the file names). -static wxMBConv_cf wxConvMacUTF8DObj(wxFONTENCODING_UTF8); +static wxMBConvD_cf wxConvMacUTF8DObj(wxFONTENCODING_UTF8); #endif WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvFileName = diff --git a/src/osx/core/strconv_cf.cpp b/src/osx/core/strconv_cf.cpp index 18f232bdbe..dfdca0fbc7 100644 --- a/src/osx/core/strconv_cf.cpp +++ b/src/osx/core/strconv_cf.cpp @@ -89,13 +89,16 @@ WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_cf(wxFontEncoding encoding) if ( theString == NULL ) return wxCONV_FAILED; - // Ensure that the string is in canonical composed form (NFC): this is - // important because Darwin uses decomposed form (NFD) for e.g. file - // names but we want to use NFC internally. - wxCFRef + if ( m_normalization & ToWChar_C ) + { + // Ensure that the string is in canonical composed form (NFC): this is + // important because Darwin uses decomposed form (NFD) for e.g. file + // names but we want to use NFC internally. + wxCFRef cfMutableString(CFStringCreateMutableCopy(NULL, 0, theString)); - CFStringNormalize(cfMutableString, kCFStringNormalizationFormC); - theString = cfMutableString; + CFStringNormalize(cfMutableString, kCFStringNormalizationFormC); + theString = cfMutableString; + } /* NOTE: The string content includes the NULL element if the source string did * That means we have to do nothing special because the destination will have @@ -103,13 +106,6 @@ WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_cf(wxFontEncoding encoding) * in the count iff it was included in the source count. */ - -/* If we're compiling against Tiger headers we can support direct conversion - * to UTF32. If we are then run against a pre-Tiger system, the encoding - * won't be available so we'll defer to the string->UTF-16->UTF-32 conversion. - */ - if(CFStringIsEncodingAvailable(wxCFStringEncodingWcharT)) - { CFRange fullStringRange = CFRangeMake(0, CFStringGetLength(theString)); CFIndex usedBufLen; @@ -135,34 +131,7 @@ WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_cf(wxFontEncoding encoding) // CFStringGetBytes does exactly the right thing when buffer // pointer is NULL and returns the number of bytes required return usedBufLen / sizeof(wchar_t); - } - else - { - // NOTE: Includes NULL iff source did - /* NOTE: This is an approximation. The eventual UTF-32 will - * possibly have less elements but certainly not more. - */ - size_t returnSize = CFStringGetLength(theString); - if (dstSize == 0 || dst == NULL) - { - return returnSize; - } - - // Convert the entire string.. too hard to figure out how many UTF-16 we'd need - // for an undersized UTF-32 destination buffer. - CFRange fullStringRange = CFRangeMake(0, CFStringGetLength(theString)); - UniChar *szUniCharBuffer = new UniChar[fullStringRange.length]; - - CFStringGetCharacters(theString, fullStringRange, szUniCharBuffer); - - wxMBConvUTF16 converter; - returnSize = converter.ToWChar( dst, dstSize, (const char*)szUniCharBuffer, fullStringRange.length ); - delete [] szUniCharBuffer; - - return returnSize; - } - // NOTREACHED } size_t wxMBConv_cf::FromWChar(char *dst, size_t dstSize, const wchar_t *src, size_t srcSize) const @@ -175,44 +144,23 @@ WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_cf(wxFontEncoding encoding) // Temporary CFString wxCFRef theString; -/* If we're compiling against Tiger headers we can support direct conversion - * from UTF32. If we are then run against a pre-Tiger system, the encoding - * won't be available so we'll defer to the UTF-32->UTF-16->string conversion. - */ - if(CFStringIsEncodingAvailable(wxCFStringEncodingWcharT)) - { theString = wxCFRef(CFStringCreateWithBytes( kCFAllocatorDefault, reinterpret_cast(src), srcSize * sizeof(wchar_t), wxCFStringEncodingWcharT, false)); - } - else - { - wxMBConvUTF16 converter; - size_t cbUniBuffer = converter.FromWChar( NULL, 0, src, srcSize ); - wxASSERT(cbUniBuffer % sizeof(UniChar)); - - // Will be free'd by kCFAllocatorMalloc when CFString is released - UniChar *tmpUniBuffer = (UniChar*)malloc(cbUniBuffer); - - cbUniBuffer = converter.FromWChar( (char*) tmpUniBuffer, cbUniBuffer, src, srcSize ); - wxASSERT(cbUniBuffer % sizeof(UniChar)); - - theString = wxCFRef(CFStringCreateWithCharactersNoCopy( - kCFAllocatorDefault, - tmpUniBuffer, - cbUniBuffer / sizeof(UniChar), - kCFAllocatorMalloc - )); - - } wxCHECK(theString != NULL, wxCONV_FAILED); - CFIndex usedBufLen; + if ( m_normalization & FromWChar_D ) + { + wxCFRef normalizedFormD = CFStringCreateMutableCopy(kCFAllocatorDefault,0,theString); + CFStringNormalize(normalizedFormD, kCFStringNormalizationFormD); + theString = normalizedFormD; + } + CFIndex usedBufLen; CFIndex charsConverted = CFStringGetBytes( theString, CFRangeMake(0, CFStringGetLength(theString)),