rewrote wxLocale to work in Unicode mode and support charsets better

git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/trunk@14617 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775
This commit is contained in:
Václav Slavík
2002-03-15 23:28:15 +00:00
parent 98f2b8145f
commit 563d535e46
2 changed files with 257 additions and 256 deletions

View File

@@ -465,7 +465,7 @@ public:
// //
// domains are searched in the last to first order, i.e. catalogs // domains are searched in the last to first order, i.e. catalogs
// added later override those added before. // added later override those added before.
const wxMB2WXbuf GetString(const wxChar *szOrigString, const wxChar *GetString(const wxChar *szOrigString,
const wxChar *szDomain = (const wxChar *) NULL) const; const wxChar *szDomain = (const wxChar *) NULL) const;
// Returns the current short name for the locale // Returns the current short name for the locale
@@ -507,13 +507,13 @@ private:
extern WXDLLEXPORT wxLocale* wxGetLocale(); extern WXDLLEXPORT wxLocale* wxGetLocale();
// get the translation of the string in the current locale // get the translation of the string in the current locale
inline const wxMB2WXbuf wxGetTranslation(const wxChar *sz) inline const wxChar *wxGetTranslation(const wxChar *sz)
{ {
wxLocale *pLoc = wxGetLocale(); wxLocale *pLoc = wxGetLocale();
if (pLoc) if (pLoc)
return pLoc->GetString(sz); return pLoc->GetString(sz);
else else
return (const wxMB2WXbuf)sz; return sz;
} }
#else // !wxUSE_INTL #else // !wxUSE_INTL

View File

@@ -53,6 +53,7 @@
#include "wx/module.h" #include "wx/module.h"
#include "wx/fontmap.h" #include "wx/fontmap.h"
#include "wx/encconv.h" #include "wx/encconv.h"
#include "wx/hashmap.h"
#ifdef __WIN32__ #ifdef __WIN32__
#include "wx/msw/private.h" #include "wx/msw/private.h"
@@ -69,30 +70,8 @@
// ---------------------------------------------------------------------------- // ----------------------------------------------------------------------------
// this should *not* be wxChar, this type must have exactly 8 bits! // this should *not* be wxChar, this type must have exactly 8 bits!
typedef unsigned char size_t8; typedef wxUint8 size_t8;
typedef wxUint32 size_t32;
#ifdef __WXMSW__
#if defined(__WIN16__)
typedef unsigned long size_t32;
#elif defined(__WIN32__)
typedef unsigned int size_t32;
#else
// Win64 will have different type sizes
#error "Please define a 32 bit type"
#endif
#else // !Windows
// SIZEOF_XXX are defined by configure
#if defined(SIZEOF_INT) && (SIZEOF_INT == 4)
typedef unsigned int size_t32;
#elif defined(SIZEOF_LONG) && (SIZEOF_LONG == 4)
typedef unsigned long size_t32;
#else
// assume sizeof(int) == 4 - what else can we do
wxCOMPILE_TIME_ASSERT( sizeof(int) == 4, IntMustBeExactly4Bytes);
typedef unsigned int size_t32;
#endif
#endif // Win/!Win
// ---------------------------------------------------------------------------- // ----------------------------------------------------------------------------
// constants // constants
@@ -161,32 +140,27 @@ static inline wxString ExtractNotLang(const wxString& langFull)
#endif // __UNIX__ #endif // __UNIX__
// ---------------------------------------------------------------------------- // ----------------------------------------------------------------------------
// wxMsgCatalog corresponds to one disk-file message catalog. // wxMsgCatalogFile corresponds to one disk-file message catalog.
// //
// This is a "low-level" class and is used only by wxLocale (that's why // This is a "low-level" class and is used only by wxMsgCatalog
// it's designed to be stored in a linked list)
// ---------------------------------------------------------------------------- // ----------------------------------------------------------------------------
class wxMsgCatalog WX_DECLARE_STRING_HASH_MAP(wxString, wxMessagesHash)
class wxMsgCatalogFile
{ {
public: public:
// ctor & dtor // ctor & dtor
wxMsgCatalog(); wxMsgCatalogFile();
~wxMsgCatalog(); ~wxMsgCatalogFile();
// load the catalog from disk (szDirPrefix corresponds to language) // load the catalog from disk (szDirPrefix corresponds to language)
bool Load(const wxChar *szDirPrefix, const wxChar *szName, bool bConvertEncoding = FALSE); bool Load(const wxChar *szDirPrefix, const wxChar *szName);
bool IsLoaded() const { return m_pData != NULL; }
// get name of the catalog // fills the hash with string-translation pairs
const wxChar *GetName() const { return m_pszName; } void FillHash(wxMessagesHash& hash, bool convertEncoding) const;
// get the translated string: returns NULL if not found
const char *GetString(const char *sz) const;
// public variable pointing to the next element in a linked list (or NULL)
wxMsgCatalog *m_pNext;
private: private:
// this implementation is binary compatible with GNU gettext() version 0.10 // this implementation is binary compatible with GNU gettext() version 0.10
@@ -214,31 +188,48 @@ private:
size_t8 *m_pData; size_t8 *m_pData;
// data description // data description
size_t32 m_numStrings, // number of strings in this domain size_t32 m_numStrings; // number of strings in this domain
m_nHashSize; // number of entries in hash table
size_t32 *m_pHashTable; // pointer to hash table
wxMsgTableEntry *m_pOrigTable, // pointer to original strings wxMsgTableEntry *m_pOrigTable, // pointer to original strings
*m_pTransTable; // translated *m_pTransTable; // translated
const char *StringAtOfs(wxMsgTableEntry *pTable, size_t32 index) const const char *StringAtOfs(wxMsgTableEntry *pTable, size_t32 index) const
{ return (const char *)(m_pData + Swap(pTable[index].ofsString)); } { return (const char *)(m_pData + Swap(pTable[index].ofsString)); }
// convert encoding to platform native one, if neccessary wxString GetCharset() const;
void ConvertEncoding();
// utility functions // utility functions
// calculate the hash value of given string
static size_t32 GetHash(const char *sz);
// big<->little endian // big<->little endian
inline size_t32 Swap(size_t32 ui) const; inline size_t32 Swap(size_t32 ui) const;
// internal state
bool HasHashTable() const // true if hash table is present
{ return m_nHashSize > 2 && m_pHashTable != NULL; }
bool m_bSwapped; // wrong endianness? bool m_bSwapped; // wrong endianness?
};
wxChar *m_pszName; // name of the domain
// ----------------------------------------------------------------------------
// wxMsgCatalog corresponds to one loaded message catalog.
//
// This is a "low-level" class and is used only by wxLocale (that's why
// it's designed to be stored in a linked list)
// ----------------------------------------------------------------------------
class wxMsgCatalog
{
public:
// load the catalog from disk (szDirPrefix corresponds to language)
bool Load(const wxChar *szDirPrefix, const wxChar *szName, bool bConvertEncoding = FALSE);
// get name of the catalog
wxString GetName() const { return m_name; }
// get the translated string: returns NULL if not found
const wxChar *GetString(const wxChar *sz) const;
// public variable pointing to the next element in a linked list (or NULL)
wxMsgCatalog *m_pNext;
private:
wxMessagesHash m_messages; // all messages in the catalog
wxString m_name; // name of the domain
}; };
// ---------------------------------------------------------------------------- // ----------------------------------------------------------------------------
@@ -253,48 +244,25 @@ static wxArrayString s_searchPrefixes;
// ============================================================================ // ============================================================================
// ---------------------------------------------------------------------------- // ----------------------------------------------------------------------------
// wxMsgCatalog class // wxMsgCatalogFile class
// ---------------------------------------------------------------------------- // ----------------------------------------------------------------------------
// calculate hash value using the so called hashpjw function by P.J. Weinberger
// [see Aho/Sethi/Ullman, COMPILERS: Principles, Techniques and Tools]
size_t32 wxMsgCatalog::GetHash(const char *sz)
{
#define HASHWORDBITS 32 // the length of size_t32
size_t32 hval = 0;
size_t32 g;
while ( *sz != '\0' ) {
hval <<= 4;
hval += (size_t32)*sz++;
g = hval & ((size_t32)0xf << (HASHWORDBITS - 4));
if ( g != 0 ) {
hval ^= g >> (HASHWORDBITS - 8);
hval ^= g;
}
}
return hval;
}
// swap the 2 halves of 32 bit integer if needed // swap the 2 halves of 32 bit integer if needed
size_t32 wxMsgCatalog::Swap(size_t32 ui) const size_t32 wxMsgCatalogFile::Swap(size_t32 ui) const
{ {
return m_bSwapped ? (ui << 24) | ((ui & 0xff00) << 8) | return m_bSwapped ? (ui << 24) | ((ui & 0xff00) << 8) |
((ui >> 8) & 0xff00) | (ui >> 24) ((ui >> 8) & 0xff00) | (ui >> 24)
: ui; : ui;
} }
wxMsgCatalog::wxMsgCatalog() wxMsgCatalogFile::wxMsgCatalogFile()
{ {
m_pData = NULL; m_pData = NULL;
m_pszName = NULL;
} }
wxMsgCatalog::~wxMsgCatalog() wxMsgCatalogFile::~wxMsgCatalogFile()
{ {
wxDELETEA(m_pData); wxDELETEA(m_pData);
wxDELETEA(m_pszName);
} }
// return all directories to search for given prefix // return all directories to search for given prefix
@@ -352,7 +320,7 @@ static wxString GetFullSearchPath(const wxChar *lang)
} }
// open disk file and read in it's contents // open disk file and read in it's contents
bool wxMsgCatalog::Load(const wxChar *szDirPrefix, const wxChar *szName0, bool bConvertEncoding) bool wxMsgCatalogFile::Load(const wxChar *szDirPrefix, const wxChar *szName0)
{ {
/* We need to handle locales like de_AT.iso-8859-1 /* We need to handle locales like de_AT.iso-8859-1
For this we first chop off the .CHARSET specifier and ignore it. For this we first chop off the .CHARSET specifier and ignore it.
@@ -439,114 +407,152 @@ bool wxMsgCatalog::Load(const wxChar *szDirPrefix, const wxChar *szName0, bool b
m_pTransTable = (wxMsgTableEntry *)(m_pData + m_pTransTable = (wxMsgTableEntry *)(m_pData +
Swap(pHeader->ofsTransTable)); Swap(pHeader->ofsTransTable));
m_nHashSize = Swap(pHeader->nHashSize);
m_pHashTable = (size_t32 *)(m_pData + Swap(pHeader->ofsHashTable));
m_pszName = new wxChar[wxStrlen(szName) + 1];
wxStrcpy(m_pszName, szName);
if (bConvertEncoding)
ConvertEncoding();
// everything is fine // everything is fine
return TRUE; return TRUE;
} }
// search for a string void wxMsgCatalogFile::FillHash(wxMessagesHash& hash, bool convertEncoding) const
const char *wxMsgCatalog::GetString(const char *szOrig) const
{ {
if ( szOrig == NULL ) wxString charset = GetCharset();
return NULL;
if ( HasHashTable() ) { // use hash table for lookup if possible #if wxUSE_WCHAR_T
size_t32 nHashVal = GetHash(szOrig); wxCSConv *csConv = NULL;
size_t32 nIndex = nHashVal % m_nHashSize; if ( !!charset )
csConv = new wxCSConv(charset);
size_t32 nIncr = 1 + (nHashVal % (m_nHashSize - 2)); wxMBConv& inputConv = csConv ? *csConv : *wxConvCurrent;
for ( ;; ) { for (size_t i = 0; i < m_numStrings; i++)
size_t32 nStr = Swap(m_pHashTable[nIndex]); {
if ( nStr == 0 ) wxString key(StringAtOfs(m_pOrigTable, i), inputConv);
return NULL;
if ( strcmp(szOrig, StringAtOfs(m_pOrigTable, nStr - 1)) == 0 ) { #if wxUSE_UNICODE
// work around for BC++ 5.5 bug: without a temp var, the optimizer hash[key] = wxString(StringAtOfs(m_pTransTable, i), inputConv);
// breaks the code and the return value is incorrect #else
const char *tmp = StringAtOfs(m_pTransTable, nStr - 1); if ( convertEncoding )
return tmp; hash[key] =
} wxString(inputConv.cMB2WC(StringAtOfs(m_pTransTable, i)),
wxConvLocal);
if ( nIndex >= m_nHashSize - nIncr)
nIndex -= m_nHashSize - nIncr;
else else
nIndex += nIncr; hash[key] = StringAtOfs(m_pTransTable, i);
#endif
}
delete csConv;
#else // !wxUSE_WCHAR_T
#if wxUSE_FONTMAP
if ( convertEncoding )
{
wxFontEncoding enc = wxTheFontMapper->CharsetToEncoding(charset, FALSE);
if ( enc == wxFONTENCODING_SYSTEM )
{
convertEncoding = FALSE; // unknown encoding
}
else
{
wxFontEncoding targetEnc = wxLocale::GetSystemEncoding();
if (targetEnc == wxFONTENCODING_SYSTEM)
{
wxFontEncodingArray a = wxEncodingConverter::GetPlatformEquivalents(enc);
if (a[0] == enc)
// no conversion needed, locale uses native encoding
convertEncoding = FALSE;
if (a.GetCount() == 0)
// we don't know common equiv. under this platform
convertEncoding = FALSE;
targetEnc = a[0];
} }
} }
else { // no hash table: use default binary search
size_t32 bottom = 0, if ( convertEncoding )
top = m_numStrings, {
current; wxEncodingConverter converter;
while ( bottom < top ) { converter.Init(enc, targetEnc);
current = (bottom + top) / 2;
int res = strcmp(szOrig, StringAtOfs(m_pOrigTable, current)); for (size_t i = 0; i < m_numStrings; i++)
if ( res < 0 ) {
top = current; wxString key(StringAtOfs(m_pOrigTable, i));
else if ( res > 0 ) hash[key] =
bottom = current + 1; converter.Convert(wxString(StringAtOfs(m_pTransTable, i)));
else { // found!
// work around the same BC++ 5.5 bug as above
const char *tmp = StringAtOfs(m_pTransTable, current);
return tmp;
} }
} }
} }
// not found if ( !convertEncoding )
return NULL; #else // !wxUSE_FONTMAP
{
for (size_t i = 0; i < m_numStrings; i++)
{
wxString key(StringAtOfs(m_pOrigTable, i));
hash[key] = StringAtOfs(m_pTransTable, i);
}
}
#endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
#endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T
} }
void wxMsgCatalog::ConvertEncoding() wxString wxMsgCatalogFile::GetCharset() const
{ {
// first, find encoding header: // first, find encoding header:
const char *hdr = StringAtOfs(m_pOrigTable, 0); const char *hdr = StringAtOfs(m_pOrigTable, 0);
if ( hdr == NULL || hdr[0] != 0 ) { if ( hdr == NULL || hdr[0] != 0 )
{
// not supported by this catalog, does not have correct header // not supported by this catalog, does not have correct header
return; return wxEmptyString;
} }
wxString header(StringAtOfs(m_pTransTable, 0)); wxString header(StringAtOfs(m_pTransTable, 0));
wxString charset; wxString charset;
int pos = header.Find(wxT("Content-Type: text/plain; charset=")); int pos = header.Find(wxT("Content-Type: text/plain; charset="));
if ( pos == wxNOT_FOUND ) if ( pos == wxNOT_FOUND )
return; // incorrectly filled Content-Type header {
// incorrectly filled Content-Type header
return wxEmptyString;
}
size_t n = pos + 34; /*strlen("Content-Type: text/plain; charset=")*/ size_t n = pos + 34; /*strlen("Content-Type: text/plain; charset=")*/
while ( header[n] != wxT('\n') ) while ( header[n] != wxT('\n') )
charset << header[n++]; charset << header[n++];
#if wxUSE_FONTMAP if ( charset == wxT("CHARSET") )
wxFontEncoding enc = wxTheFontMapper->CharsetToEncoding(charset, FALSE);
if ( enc == wxFONTENCODING_SYSTEM )
return; // unknown encoding
wxFontEncoding targetEnc = wxLocale::GetSystemEncoding();
if (targetEnc == wxFONTENCODING_SYSTEM)
{ {
wxFontEncodingArray a = wxEncodingConverter::GetPlatformEquivalents(enc); // "CHARSET" is not valid charset, but lazy translator
if (a[0] == enc) return wxEmptyString;
return; // no conversion needed, locale uses native encoding
if (a.GetCount() == 0)
return; // we don't know common equiv. under this platform
targetEnc = a[0];
} }
wxEncodingConverter converter; return charset;
converter.Init(enc, targetEnc);
for (size_t i = 0; i < m_numStrings; i++)
converter.Convert((char*)StringAtOfs(m_pTransTable, i));
#endif // wxUSE_FONTMAP
} }
// ----------------------------------------------------------------------------
// wxMsgCatalog class
// ----------------------------------------------------------------------------
bool wxMsgCatalog::Load(const wxChar *szDirPrefix, const wxChar *szName,
bool bConvertEncoding = FALSE)
{
wxMsgCatalogFile file;
m_name = szName;
if ( file.Load(szDirPrefix, szName) )
{
file.FillHash(m_messages, bConvertEncoding);
return TRUE;
}
else
return FALSE;
}
const wxChar *wxMsgCatalog::GetString(const wxChar *sz) const
{
wxMessagesHash::const_iterator i = m_messages.find(sz);
if ( i != m_messages.end() )
{
return i->second.c_str();
}
else
return NULL;
}
// ---------------------------------------------------------------------------- // ----------------------------------------------------------------------------
// wxLocale // wxLocale
@@ -1377,39 +1383,39 @@ wxLocale::~wxLocale()
} }
// get the translation of given string in current locale // get the translation of given string in current locale
const wxMB2WXbuf wxLocale::GetString(const wxChar *szOrigString, const wxChar *wxLocale::GetString(const wxChar *szOrigString,
const wxChar *szDomain) const const wxChar *szDomain) const
{ {
if ( wxIsEmpty(szOrigString) ) if ( wxIsEmpty(szOrigString) )
return _T(""); return _T("");
const char *pszTrans = NULL; const wxChar *pszTrans = NULL;
#if wxUSE_UNICODE
const wxWX2MBbuf szOrgString = wxConvCurrent->cWX2MB(szOrigString);
#else // ANSI
#define szOrgString szOrigString
#endif // Unicode/ANSI
wxMsgCatalog *pMsgCat; wxMsgCatalog *pMsgCat;
if ( szDomain != NULL ) {
if ( szDomain != NULL )
{
pMsgCat = FindCatalog(szDomain); pMsgCat = FindCatalog(szDomain);
// does the catalog exist? // does the catalog exist?
if ( pMsgCat != NULL ) if ( pMsgCat != NULL )
pszTrans = pMsgCat->GetString(szOrgString); pszTrans = pMsgCat->GetString(szOrigString);
} }
else { else
{
// search in all domains // search in all domains
for ( pMsgCat = m_pMsgCat; pMsgCat != NULL; pMsgCat = pMsgCat->m_pNext ) { for ( pMsgCat = m_pMsgCat; pMsgCat != NULL; pMsgCat = pMsgCat->m_pNext )
pszTrans = pMsgCat->GetString(szOrgString); {
pszTrans = pMsgCat->GetString(szOrigString);
if ( pszTrans != NULL ) // take the first found if ( pszTrans != NULL ) // take the first found
break; break;
} }
} }
if ( pszTrans == NULL ) { if ( pszTrans == NULL )
{
#ifdef __WXDEBUG__ #ifdef __WXDEBUG__
if ( !NoTransErr::Suppress() ) { if ( !NoTransErr::Suppress() )
{
NoTransErr noTransErr; NoTransErr noTransErr;
if ( szDomain != NULL ) if ( szDomain != NULL )
@@ -1425,16 +1431,10 @@ const wxMB2WXbuf wxLocale::GetString(const wxChar *szOrigString,
} }
#endif // __WXDEBUG__ #endif // __WXDEBUG__
return (wxMB2WXbuf)(szOrigString); return szOrigString;
} }
// or preferably wxCSConv(charset).cMB2WX(pszTrans) or something, a macro return pszTrans;
// similar to wxConvertMB2WX could be written for that
return wxConvertMB2WX(pszTrans);
// undo the hack from the beginning of this function
#undef szOrgString
} }
// find catalog by name in a linked list, return NULL if !found // find catalog by name in a linked list, return NULL if !found
@@ -1442,7 +1442,8 @@ wxMsgCatalog *wxLocale::FindCatalog(const wxChar *szDomain) const
{ {
// linear search in the linked list // linear search in the linked list
wxMsgCatalog *pMsgCat; wxMsgCatalog *pMsgCat;
for ( pMsgCat = m_pMsgCat; pMsgCat != NULL; pMsgCat = pMsgCat->m_pNext ) { for ( pMsgCat = m_pMsgCat; pMsgCat != NULL; pMsgCat = pMsgCat->m_pNext )
{
if ( wxStricmp(pMsgCat->GetName(), szDomain) == 0 ) if ( wxStricmp(pMsgCat->GetName(), szDomain) == 0 )
return pMsgCat; return pMsgCat;
} }