diff --git a/include/wx/uilocale.h b/include/wx/uilocale.h index 4bd9d49ce5..2f3e3f1ac6 100644 --- a/include/wx/uilocale.h +++ b/include/wx/uilocale.h @@ -33,17 +33,14 @@ enum class WXDLLIMPEXP_BASE wxLocaleIdent { public: + // Create the object from BCP 47-like language tag: the string must contain + // at least the language part (2 or 3 ASCII letters) and may contain script + // and region separated by dashes. + static wxLocaleIdent FromTag(const wxString& tag); + // Default ctor creates an empty, invalid identifier. wxLocaleIdent() { } - // Construct from language, i.e. a two-letter ISO 639-1 code (or a - // three-letter ISO 639-2 code if there is no ISO 639-1 code for this - // language). - wxLocaleIdent(const char* language) - : m_language(wxString::FromAscii(language)) - { - } - // Set language wxLocaleIdent& Language(const wxString& language); @@ -69,6 +66,11 @@ public: // Construct platform dependent name wxString GetName() const; + // Get the language tag: for the objects created with FromTag() returns the + // string passed to it directly, otherwise reconstructs this string from + // the components. + wxString GetTag() const; + // Empty locale identifier is invalid. at least Language() must be called. bool IsEmpty() const { @@ -76,6 +78,8 @@ public: } private: + wxString m_tag; + wxString m_language; wxString m_region; wxString m_script; diff --git a/interface/wx/uilocale.h b/interface/wx/uilocale.h index 988acfd756..c6cd0f7a52 100644 --- a/interface/wx/uilocale.h +++ b/interface/wx/uilocale.h @@ -195,25 +195,66 @@ public: wxString wxGetUIDateFormat(); /** - Allows to construct the full locale identifier in a portable way. + Represents a locale in a portable way. - Parts of the locale not supported by the current platform (e.g. modifier under non-Unix platforms) are ignored. - The remaining parts are used to construct a string uniquely identifying the locale in a platform-specific name. + There are two possible ways to construct wxLocaleIdent: - Usage example: + - You can either use fromTag() to create it from a string in the form + @code language ["-" script] ["-" region] @endcode, corresponding to + the subset of BCP 47 (https://www.rfc-editor.org/rfc/bcp/bcp47.txt) + syntax. + - Or you can create it from the different parts of this string by using + the default constructor and then chaining calls to Language(), + Region(), Script() and other methods. + + The first method is useful for interoperating with the other software using + BCP 47 language tags, while the second one may may result in more readable + code and allows to specify Unix-specific locale description parts such as + charset and modifier that are not part of the BCP 47 strings. + + Example of using wxLocaleIdent in the second way: @code - auto loc = wxLocaleIdent("fr").Region("BE").Modifier("euro"); + auto loc = wxLocaleIdent().Language("fr").Region("BE").Modifier("euro"); #if defined(__WINDOWS__) || defined(__WXOSX__) wxASSERT( loc.GetName() == "fr_BE" ); #elif defined(__UNIX__) wxASSERT( loc.GetName() == "fr_BE@euro" ); #endif @endcode + + For the first way, it is enough to just write + @code + auto loc = wxLocaleIdent::FromTag("fr-BE"); // Dash, not underscore! + @endcode + @since 3.1.6 */ class wxLocaleIdent { public: + /** + Return the locale identifier corresponding to the given BCP 47-like tag. + + The string must contain at least the language part (2 or 3 ASCII + letters) and may contain script and region separated by dashes, i.e. + all of the following are valid: + + - "mn" + - "mn-MN" + - "mn-Cyrl-MN" + + Note that while BCP 47 extlangs, variants, extensions, private use and + grandfathered tags are currently not directly supported, they may still + work for creating wxUILocale on platforms with native support for BCP + 47 strings. + + If the input argument uses an unrecognized syntax (e.g. is empty), an + empty wxLocaleIdent is returned. Of course, even if this function + returns a non-empty object, the resulting locale may still be invalid + or unsupported, use wxUILocale::IsSupported() to check for this. + */ + static wxLocaleIdent FromTag(const wxString& tag); + /** Default constructor creates an empty and invalid locale identifier. @@ -221,18 +262,6 @@ public: */ wxLocaleIdent(); - /** - Constructor with language. - - Note that this constructor is non-explicit, allowing to pass just a - simple string, such as "en", to functions taking wxLocaleIdent. - - @param language - ISO 639 language code. - See Language() for more detailed info. - */ - wxLocaleIdent(const char* language); - /** Set language. diff --git a/src/common/uilocale.cpp b/src/common/uilocale.cpp index 4ec4804497..4bd7615ae7 100644 --- a/src/common/uilocale.cpp +++ b/src/common/uilocale.cpp @@ -22,6 +22,8 @@ #include "wx/uilocale.h" +#include "wx/arrstr.h" + #ifndef __WINDOWS__ #include "wx/language.h" #endif @@ -45,6 +47,96 @@ wxUILocale wxUILocale::ms_current; // wxLocaleIdent // ---------------------------------------------------------------------------- +/* static */ +wxLocaleIdent wxLocaleIdent::FromTag(const wxString& tag) +{ + // See section 2.01 of https://www.rfc-editor.org/rfc/bcp/bcp47.txt for the + // full syntax. Here we fully support just the subset we're interested in: + // + // - Normal language tags (not private use or grandfathered ones). + // - Only script and region, but not the extensions or extlangs. + + // Language tags must always use ASCII. + if ( tag != tag.ToAscii() ) + return wxLocaleIdent(); + + const wxArrayString& parts = wxSplit(tag, '-', '\0'); + wxArrayString::const_iterator it = parts.begin(); + if ( it == parts.end() ) + return wxLocaleIdent(); + + // We have at least the language, so we'll return a valid object. + wxLocaleIdent locId; + locId.m_language = *it; + + // Also store the full string, so that the platforms that support BCP 47 + // natively can use it instead of reconstructing the string from our fields. + locId.m_tag = tag; + + if ( ++it == parts.end() ) + return locId; + + // Advance to the next component we know about. + switch ( locId.m_language.length() ) + { + case 2: + case 3: + // Looks like an ISO 639 code. + break; + + default: + // It may be private use or grandfathered tag or just invalid + // syntax, but in any case we can't parse it further. + return locId; + } + + // Skip extlangs that are 3 letters long, in contrast to 3 digit region + // codes. + while ( it->length() == 3 && !isdigit((*it)[0]) ) + { + if ( ++it == parts.end() ) + return locId; + } + + switch ( it->length() ) + { + case 2: + case 3: + // Either an ISO 3166-1 or UN M.49 region code. + locId.m_region = *it; + break; + + case 4: + // Must be an ISO 15924 script. + locId.m_script = *it; + break; + + default: + // This looks to be completely invalid. + return wxLocaleIdent(); + } + + // If we got the language and the region, we can't parse anything else + // (variants, extensions, private use) anyhow. + if ( !locId.m_region.empty() ) + return locId; + + // Otherwise we must have got the script above, so check if we have the + // region too. + if ( ++it == parts.end() ) + return locId; + + switch ( it->length() ) + { + case 2: + case 3: + locId.m_region = *it; + break; + } + + return locId; +} + wxLocaleIdent& wxLocaleIdent::Language(const wxString& language) { m_language = language; @@ -75,6 +167,22 @@ wxLocaleIdent& wxLocaleIdent::Modifier(const wxString& modifier) return *this; } +wxString wxLocaleIdent::GetTag() const +{ + if ( !m_tag.empty() ) + return m_tag; + + wxString tag = m_language; + + if ( !m_script.empty() ) + tag << '-' << m_script; + + if ( !m_region.empty() ) + tag << '-' << m_region; + + return tag; +} + // ---------------------------------------------------------------------------- // wxUILocale // ---------------------------------------------------------------------------- diff --git a/src/msw/uilocale.cpp b/src/msw/uilocale.cpp index 409850566f..6cb557ca64 100644 --- a/src/msw/uilocale.cpp +++ b/src/msw/uilocale.cpp @@ -420,7 +420,7 @@ wxUILocaleImpl* wxUILocaleImpl::CreateForLocale(const wxLocaleIdent& locId) return NULL; } - return wxUILocaleImplName::Create(locId.GetName().wc_str()); + return wxUILocaleImplName::Create(locId.GetTag().wc_str()); } #endif // wxUSE_INTL diff --git a/src/osx/core/uilocale.mm b/src/osx/core/uilocale.mm index 1c96257a52..3b76e7a069 100644 --- a/src/osx/core/uilocale.mm +++ b/src/osx/core/uilocale.mm @@ -139,7 +139,7 @@ wxUILocaleImplCF::GetInfo(wxLocaleInfo index, wxLocaleCategory cat) const /* static */ wxUILocaleImpl* wxUILocaleImpl::CreateStdC() { - return wxUILocaleImplCF::Create(wxLocaleIdent("C")); + return wxUILocaleImplCF::Create(wxLocaleIdent().Language("C")); } /* static */ diff --git a/src/unix/uilocale.cpp b/src/unix/uilocale.cpp index 1e6306db9e..78cc020184 100644 --- a/src/unix/uilocale.cpp +++ b/src/unix/uilocale.cpp @@ -387,7 +387,7 @@ wxUILocaleImplUnix::CompareStrings(const wxString& lhs, const wxString& rhs, /* static */ wxUILocaleImpl* wxUILocaleImpl::CreateStdC() { - return new wxUILocaleImplUnix("C"); + return new wxUILocaleImplUnix(wxLocaleIdent().Language("C")); } /* static */ diff --git a/tests/intl/intltest.cpp b/tests/intl/intltest.cpp index bc0199efbb..e4b7131b2c 100644 --- a/tests/intl/intltest.cpp +++ b/tests/intl/intltest.cpp @@ -260,23 +260,23 @@ static inline bool CheckSupported(const wxUILocale& loc, const char* desc) TEST_CASE("wxUILocale::IsSupported", "[uilocale]") { - CheckSupported(wxUILocale("en"), "English"); - CheckSupported(wxUILocale(wxLocaleIdent("fr").Region("FR")), "French"); - CHECK( !wxUILocale("bloordyblop").IsSupported() ); + CheckSupported(wxUILocale(wxLocaleIdent::FromTag("en")), "English"); + CheckSupported(wxUILocale(wxLocaleIdent().Language("fr").Region("FR")), "French"); + CHECK( !wxUILocale(wxLocaleIdent::FromTag("bloordyblop")).IsSupported() ); } TEST_CASE("wxUILocale::GetInfo", "[uilocale]") { - CHECK( wxUILocale("en").GetInfo(wxLOCALE_DECIMAL_POINT) == "." ); + CHECK( wxUILocale(wxLocaleIdent::FromTag("en")).GetInfo(wxLOCALE_DECIMAL_POINT) == "." ); - const wxUILocale locDE("de"); + const wxUILocale locDE(wxLocaleIdent::FromTag("de")); if ( CheckSupported(locDE, "German") ) CHECK( locDE.GetInfo(wxLOCALE_DECIMAL_POINT) == "," ); // This one shows that "Swiss High German" locale (de_CH) correctly uses // dot, and not comma, as decimal separator, even under macOS, where POSIX // APIs use incorrect (identical to "German") definitions for this locale. - const wxUILocale locDE_CH(wxLocaleIdent("de").Region("CH")); + const wxUILocale locDE_CH(wxLocaleIdent().Language("de").Region("CH")); if ( CheckSupported(locDE_CH, "Swiss German") ) CHECK( locDE_CH.GetInfo(wxLOCALE_DECIMAL_POINT) == "." ); } @@ -288,7 +288,7 @@ TEST_CASE("wxUILocale::CompareStrings", "[uilocale]") { SECTION("English") { - const wxUILocale l("en"); + const wxUILocale l(wxLocaleIdent::FromTag("en")); // This is not very interesting, but check that comparison works at all. CHECK( l.CompareStrings("x", "x") == 0 ); @@ -319,7 +319,7 @@ TEST_CASE("wxUILocale::CompareStrings", "[uilocale]") #if wxUSE_UNICODE SECTION("German") { - const wxUILocale l(wxLocaleIdent("de").Region("DE")); + const wxUILocale l(wxLocaleIdent().Language("de").Region("DE")); if ( !CheckSupported(l, "German") ) return; @@ -344,7 +344,7 @@ TEST_CASE("wxUILocale::CompareStrings", "[uilocale]") if ( wxIsRunningUnderWine() ) return; - const wxUILocale l("sv"); + const wxUILocale l(wxLocaleIdent::FromTag("sv")); if ( !CheckSupported(l, "Swedish") ) return;