Added 32-bit (UCS-4) wxUString class

git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/trunk@54802 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775
This commit is contained in:
Robert Roebling
2008-07-29 11:01:16 +00:00
parent dfbe9d4f59
commit 9a6d14383a
8 changed files with 1622 additions and 2 deletions

View File

@@ -408,6 +408,7 @@ IMPORTANT: please read docs/tech/tn0016.txt before modifying this file!
src/common/txtstrm.cpp
src/common/unichar.cpp
src/common/uri.cpp
src/common/ustring.cpp
src/common/variant.cpp
src/common/wfstream.cpp
src/common/wxcrt.cpp
@@ -534,6 +535,7 @@ IMPORTANT: please read docs/tech/tn0016.txt before modifying this file!
wx/types.h
wx/unichar.h
wx/uri.h
wx/ustring.h
wx/utils.h
wx/variant.h
wx/vector.h

View File

@@ -145,7 +145,7 @@
typedef char wxChar;
typedef signed char wxSChar;
typedef unsigned char wxUChar;
#else /* Unicode */
#else
/* VZ: note that VC++ defines _T[SU]CHAR simply as wchar_t and not as */
/* signed/unsigned version of it which (a) makes sense to me (unlike */
/* char wchar_t is always unsigned) and (b) was how the previous */

View File

@@ -1273,6 +1273,24 @@ typedef double wxDouble;
*/
#define wxNullPtr ((void *)NULL)
/* Define wxChar16 and wxChar32 */
#if wxUSE_WCHAR_T && (!defined(SIZEOF_WCHAR_T) || (SIZEOF_WCHAR_T == 2))
#define wxWCHAR_T_IS_WXCHAR16
typedef wchar_t wxChar16;
#else
typedef wxUint16 wxChar16;
#endif
#if wxUSE_WCHAR_T && defined(SIZEOF_WCHAR_T) && (SIZEOF_WCHAR_T == 4)
#define wxWCHAR_T_IS_WXCHAR32
typedef wchar_t wxChar32;
#else
typedef wxUint32 wxChar32;
#endif
/* ---------------------------------------------------------------------------- */
/* byte ordering related definition and macros */
/* ---------------------------------------------------------------------------- */

724
include/wx/ustring.h Normal file
View File

@@ -0,0 +1,724 @@
/////////////////////////////////////////////////////////////////////////////
// Name: wx/ustring.h
// Purpose: 32-bit string (UCS-4)
// Author: Robert Roebling
// Copyright: (c) Robert Roebling
// RCS-ID: $Id: tab.h 37400 2006-02-09 00:28:34Z VZ $
// Licence: wxWindows licence
/////////////////////////////////////////////////////////////////////////////
#ifndef _WX_USTRING_H_BASE_
#define _WX_USTRING_H_BASE_
#include "wx/defs.h"
#include "wx/string.h"
WXDLLIMPEXP_TEMPLATE_INSTANCE_BASE( wxCharTypeBuffer<wxChar32> )
#if SIZEOF_WCHAR_T == 2
typedef wxWCharBuffer wxU16CharBuffer;
#else
class WXDLLIMPEXP_BASE wxU16CharBuffer : public wxCharTypeBuffer<wxChar16>
{
public:
typedef wxCharTypeBuffer<wxChar16> wxCharTypeBufferBase;
wxU16CharBuffer(const wxCharTypeBufferBase& buf)
: wxCharTypeBufferBase(buf) {}
wxU16CharBuffer(const CharType *str = NULL) : wxCharTypeBufferBase(str) {}
wxU16CharBuffer(size_t len) : wxCharTypeBufferBase(len) {}
};
#endif
#if SIZEOF_WCHAR_T == 2
class WXDLLIMPEXP_BASE wxU32CharBuffer : public wxCharTypeBuffer<wxChar32>
{
public:
typedef wxCharTypeBuffer<wxChar32> wxCharTypeBufferBase;
wxU32CharBuffer(const wxCharTypeBufferBase& buf)
: wxCharTypeBufferBase(buf) {}
wxU32CharBuffer(const CharType *str = NULL) : wxCharTypeBufferBase(str) {}
wxU32CharBuffer(size_t len) : wxCharTypeBufferBase(len) {}
};
#else
typedef wxWCharBuffer wxU32CharBuffer;
#endif
class WXDLLIMPEXP_BASE wxUString: public std::basic_string<wxChar32>
{
public:
wxUString() { }
wxUString( const wxChar32 *str ) { assign(str); }
wxUString( const wxUString &str ) { assign(str); }
wxUString( const wxU32CharBuffer &buf ) { assign(buf); }
wxUString( const char *str ) { assign(str); }
wxUString( const wxCharBuffer &buf ) { assign(buf); }
wxUString( const char *str, const wxMBConv &conv ) { assign(str,conv); }
wxUString( const wxCharBuffer &buf, const wxMBConv &conv ) { assign(buf,conv); }
wxUString( const wxChar16 *str ) { assign(str); }
wxUString( const wxU16CharBuffer &buf ) { assign(buf); }
wxUString( const wxCStrData *cstr ) { assign(cstr); }
wxUString( const wxString &str ) { assign(str); }
wxUString( char ch ) { assign(ch); }
wxUString( wxChar16 ch ) { assign(ch); }
wxUString( wxChar32 ch ) { assign(ch); }
wxUString( wxUniChar ch ) { assign(ch); }
wxUString( wxUniCharRef ch ) { assign(ch); }
wxUString( size_type n, char ch ) { assign(n,ch); }
wxUString( size_type n, wxChar16 ch ) { assign(n,ch); }
wxUString( size_type n, wxChar32 ch ) { assign(n,ch); }
wxUString( size_type n, wxUniChar ch ) { assign(n,ch); }
wxUString( size_type n, wxUniCharRef ch ) { assign(n,ch); }
// static construction
static wxUString FromAscii( const char *str, size_type n )
{
wxUString ret;
ret.assignFromAscii( str, n );
return ret;
}
static wxUString FromAscii( const char *str )
{
wxUString ret;
ret.assignFromAscii( str );
return ret;
}
static wxUString FromUTF8( const char *str, size_type n )
{
wxUString ret;
ret.assignFromUTF8( str, n );
return ret;
}
static wxUString FromUTF8( const char *str )
{
wxUString ret;
ret.assignFromUTF8( str );
return ret;
}
static wxUString FromUTF16( const wxChar16 *str, size_type n )
{
wxUString ret;
ret.assignFromUTF16( str, n );
return ret;
}
static wxUString FromUTF16( const wxChar16 *str )
{
wxUString ret;
ret.assignFromUTF16( str );
return ret;
}
// assign from encoding
wxUString &assignFromAscii( const char *str );
wxUString &assignFromAscii( const char *str, size_type n );
wxUString &assignFromUTF8( const char *str );
wxUString &assignFromUTF8( const char *str, size_type n );
wxUString &assignFromUTF16( const wxChar16* str );
wxUString &assignFromUTF16( const wxChar16* str, size_type n );
wxUString &assignFromCString( const char* str );
wxUString &assignFromCString( const char* str, const wxMBConv &conv );
// conversions
wxCharBuffer utf8_str() const;
wxU16CharBuffer utf16_str() const;
#if SIZEOF_WCHAR_T == 2
wxWCharBuffer wc_str() const
{
return utf16_str();
}
#else
wchar_t *wc_str() const
{
return (wchar_t*) c_str();
}
#endif
operator wxString() const
{
#if wxUSE_UNICODE_UTF8
return wxString::FromUTF8( utf8_str() );
#else
#if SIZEOF_WCHAR_T == 2
return wxString( utf16_str() );
#else
return wxString( c_str() );
#endif
#endif
}
#if wxUSE_UNICODE_UTF8
wxCharBuffer wx_str()
{
return utf8_str();
}
#else
#if SIZEOF_WCHAR_T == 2
wxWCharBuffer wx_str()
{
return utf16_str();
}
#else
wchar_t* wx_str()
{
return c_str();
}
#endif
#endif
// assign
inline wxUString &assign( const wxChar32* str )
{
std::basic_string<wxChar32> *base = this;
return (wxUString &) base->assign( str );
}
inline wxUString &assign( const wxChar32* str, size_type n )
{
std::basic_string<wxChar32> *base = this;
return (wxUString &) base->assign( str, n );
}
inline wxUString &assign( const wxUString &str )
{
std::basic_string<wxChar32> *base = this;
return (wxUString &) base->assign( str );
}
inline wxUString &assign( const wxUString &str, size_type pos, size_type n )
{
std::basic_string<wxChar32> *base = this;
return (wxUString &) base->assign( str, pos, n );
}
inline wxUString &assign( wxChar32 ch )
{
std::basic_string<wxChar32> *base = this;
return (wxUString &) base->assign( (size_type) 1, ch );
}
inline wxUString &assign( size_type n, wxChar32 ch )
{
std::basic_string<wxChar32> *base = this;
return (wxUString &) base->assign( n, ch );
}
wxUString &assign( const wxU32CharBuffer &buf )
{
return assign( buf.data() );
}
wxUString &assign( const char *str )
{
return assignFromCString( str );
}
wxUString &assign( const wxCharBuffer &buf )
{
return assignFromCString( buf.data() );
}
wxUString &assign( const char *str, const wxMBConv &conv )
{
return assignFromCString( str, conv );
}
wxUString &assign( const wxCharBuffer &buf, const wxMBConv &conv )
{
return assignFromCString( buf.data(), conv );
}
wxUString &assign( const wxChar16 *str )
{
return assignFromUTF16( str );
}
wxUString &assign( const wxU16CharBuffer &buf )
{
return assignFromUTF16( buf.data() );
}
wxUString &assign( const wxCStrData *cstr )
{
#if SIZEOF_WCHAR_T == 2
return assignFromUTF16( cstr->AsWChar() );
#else
return assign( cstr->AsWChar() );
#endif
}
wxUString &assign( const wxString &str )
{
#if wxUSE_UNICODE_UTF8
return assignFromUTF8( str.wx_str() );
#else
#if SIZEOF_WCHAR_T == 2
return assignFromUTF16( str.wc_str() );
#else
return assign( wc_str() );
#endif
#endif
}
wxUString &assign( char ch )
{
char buf[2];
buf[0] = ch;
buf[1] = 0;
return assignFromCString( buf );
}
wxUString &assign( size_type n, char ch )
{
wxCharBuffer buffer(n);
char *p = buffer.data();
size_type i;
for (i = 0; i < n; i++)
{
*p = ch;
p++;
}
return assignFromCString( buffer.data() );
}
wxUString &assign( wxChar16 ch )
{
wxChar16 buf[2];
buf[0] = ch;
buf[1] = 0;
return assignFromUTF16( buf );
}
wxUString &assign( size_type n, wxChar16 ch )
{
wxU16CharBuffer buffer(n);
wxChar16 *p = buffer.data();
size_type i;
for (i = 0; i < n; i++)
{
*p = ch;
p++;
}
return assignFromUTF16( buffer.data() );
}
wxUString &assign( wxUniChar ch )
{
return assign( (const wxChar32) ch.GetValue() );
}
wxUString &assign( size_type n, wxUniChar ch )
{
return assign( n, (const wxChar32) ch.GetValue() );
}
wxUString &assign( wxUniCharRef ch )
{
return assign( (const wxChar32) ch.GetValue() );
}
wxUString &assign( size_type n, wxUniCharRef ch )
{
return assign( n, (const wxChar32) ch.GetValue() );
}
// append [STL overload]
inline wxUString &append( const wxUString &s )
{
std::basic_string<wxChar32> *base = this;
return (wxUString &) base->append( s );
}
inline wxUString &append( const wxUString &s, size_type pos, size_type n )
{
std::basic_string<wxChar32> *base = this;
return (wxUString &) base->append( s, pos, n );
}
inline wxUString &append( const wxChar32* s )
{
std::basic_string<wxChar32> *base = this;
return (wxUString &) base->append( s );
}
inline wxUString &append( const wxChar32* s, size_type n )
{
std::basic_string<wxChar32> *base = this;
return (wxUString &) base->append( s, n );
}
inline wxUString &append( size_type n, wxChar32 c )
{
std::basic_string<wxChar32> *base = this;
return (wxUString &) base->append( n, c );
}
inline wxUString &append( wxChar32 c )
{
std::basic_string<wxChar32> *base = this;
return (wxUString &) base->append( 1, c );
}
// append [wx overload]
wxUString &append( const wxU16CharBuffer &buf )
{
return append( buf.data() );
}
wxUString &append( const wxU32CharBuffer &buf )
{
return append( buf.data() );
}
wxUString &append( const char *str )
{
return append( wxUString( str ) );
}
wxUString &append( const wxCharBuffer &buf )
{
return append( wxUString( buf ) );
}
wxUString &append( const wxChar16 *str )
{
return append( wxUString( str ) );
}
wxUString &append( const wxString &str )
{
return append( wxUString( str ) );
}
wxUString &append( const wxCStrData *cstr )
{
return append( wxUString( cstr ) );
}
wxUString &append( char ch )
{
char buf[2];
buf[0] = ch;
buf[1] = 0;
return append( buf );
}
wxUString &append( wxChar16 ch )
{
wxChar16 buf[2];
buf[0] = ch;
buf[1] = 0;
return append( buf );
}
wxUString &append( wxUniChar ch )
{
return append( (size_type) 1, (wxChar32) ch.GetValue() );
}
wxUString &append( wxUniCharRef ch )
{
return append( (size_type) 1, (wxChar32) ch.GetValue() );
}
// insert [STL overloads]
inline wxUString &insert( size_type pos, const wxUString &s )
{
std::basic_string<wxChar32> *base = this;
return (wxUString &) base->insert( pos, s );
}
inline wxUString &insert( size_type pos, const wxUString &s, size_type pos1, size_type n )
{
std::basic_string<wxChar32> *base = this;
return (wxUString &) base->insert( pos, s, pos1, n );
}
inline wxUString &insert( size_type pos, const wxChar32 *s )
{
std::basic_string<wxChar32> *base = this;
return (wxUString &) base->insert( pos, s );
}
inline wxUString &insert( size_type pos, const wxChar32 *s, size_type n )
{
std::basic_string<wxChar32> *base = this;
return (wxUString &) base->insert( pos, s, n );
}
inline wxUString &insert( size_type pos, size_type n, wxChar32 c )
{
std::basic_string<wxChar32> *base = this;
return (wxUString &) base->insert( pos, n, c );
}
// insert [STL overloads]
wxUString &insert( size_type n, const char *s )
{
return insert( n, wxUString( s ) );
}
wxUString &insert( size_type n, const wxChar16 *s )
{
return insert( n, wxUString( s ) );
}
wxUString &insert( size_type n, const wxCharBuffer &buf )
{
return insert( n, wxUString( buf ) );
}
wxUString &insert( size_type n, const wxU16CharBuffer &buf )
{
return insert( n, wxUString( buf ) );
}
wxUString &insert( size_type n, const wxU32CharBuffer &buf )
{
return insert( n, buf.data() );
}
wxUString &insert( size_type n, const wxString &s )
{
return insert( n, wxUString( s ) );
}
wxUString &insert( size_type n, const wxCStrData *cstr )
{
return insert( n, wxUString( cstr ) );
}
wxUString &insert( size_type n, char ch )
{
char buf[2];
buf[0] = ch;
buf[1] = 0;
return insert( n, buf );
}
wxUString &insert( size_type n, wchar_t ch )
{
wchar_t buf[2];
buf[0] = ch;
buf[1] = 0;
return insert( n, buf );
}
// insert iterator
iterator insert( iterator it, wxChar32 ch )
{
std::basic_string<wxChar32> *base = this;
return base->insert( it, ch );
}
void insert(iterator it, const_iterator first, const_iterator last)
{
std::basic_string<wxChar32> *base = this;
base->insert( it, first, last );
}
// operator =
inline wxUString& operator=(const wxUString& s)
{ return assign( s ); }
inline wxUString& operator=(const wxString& s)
{ return assign( s ); }
inline wxUString& operator=(const wxCStrData* s)
{ return assign( s ); }
inline wxUString& operator=(const char *s)
{ return assign( s ); }
inline wxUString& operator=(const wxChar16 *s)
{ return assign( s ); }
inline wxUString& operator=(const wxChar32 *s)
{ return assign( s ); }
inline wxUString& operator=(const wxCharBuffer &s)
{ return assign( s ); }
inline wxUString& operator=(const wxU16CharBuffer &s)
{ return assign( s ); }
inline wxUString& operator=(const wxU32CharBuffer &s)
{ return assign( s ); }
inline wxUString& operator=(const char ch)
{ return assign( ch ); }
inline wxUString& operator=(const wxChar16 ch)
{ return assign( ch ); }
inline wxUString& operator=(const wxChar32 ch)
{ return assign( ch ); }
inline wxUString& operator=(const wxUniChar ch)
{ return assign( ch ); }
inline wxUString& operator=(const wxUniCharRef ch)
{ return assign( ch ); }
// operator +=
inline wxUString& operator+=(const wxUString& s)
{ return append( s ); }
inline wxUString& operator+=(const wxString& s)
{ return append( s ); }
inline wxUString& operator+=(const wxCStrData* s)
{ return append( s ); }
inline wxUString& operator+=(const char *s)
{ return append( s ); }
inline wxUString& operator+=(const wxChar16 *s)
{ return append( s ); }
inline wxUString& operator+=(const wxChar32 *s)
{ return append( s ); }
inline wxUString& operator+=(const wxCharBuffer &s)
{ return append( s ); }
inline wxUString& operator+=(const wxU16CharBuffer &s)
{ return append( s ); }
inline wxUString& operator+=(const wxU32CharBuffer &s)
{ return append( s ); }
inline wxUString& operator+=(const char ch)
{ return append( ch ); }
inline wxUString& operator+=(const wxChar16 ch)
{ return append( ch ); }
inline wxUString& operator+=(const wxChar32 ch)
{ return append( ch ); }
inline wxUString& operator+=(const wxUniChar ch)
{ return append( ch ); }
inline wxUString& operator+=(const wxUniCharRef ch)
{ return append( ch ); }
};
inline wxUString operator+(const wxUString &s1, const wxUString &s2)
{ wxUString ret( s1 ); ret.append( s2 ); return ret; }
inline wxUString operator+(const wxUString &s1, const char *s2)
{ return s1 + wxUString(s2); }
inline wxUString operator+(const wxUString &s1, const wxString &s2)
{ return s1 + wxUString(s2); }
inline wxUString operator+(const wxUString &s1, const wxCStrData *s2)
{ return s1 + wxUString(s2); }
inline wxUString operator+(const wxUString &s1, const wxChar16* s2)
{ return s1 + wxUString(s2); }
inline wxUString operator+(const wxUString &s1, const wxChar32 *s2)
{ return s1 + wxUString(s2); }
inline wxUString operator+(const wxUString &s1, const wxCharBuffer &s2)
{ return s1 + wxUString(s2); }
inline wxUString operator+(const wxUString &s1, const wxU16CharBuffer &s2)
{ return s1 + wxUString(s2); }
inline wxUString operator+(const wxUString &s1, const wxU32CharBuffer &s2)
{ return s1 + wxUString(s2); }
inline wxUString operator+(const wxUString &s1, char s2)
{ return s1 + wxUString(s2); }
inline wxUString operator+(const wxUString &s1, wxChar32 s2)
{ wxUString ret( s1 ); ret.append( s2 ); return ret; }
inline wxUString operator+(const wxUString &s1, wxChar16 s2)
{ wxUString ret( s1 ); ret.append( (wxChar32) s2 ); return ret; }
inline wxUString operator+(const wxUString &s1, wxUniChar s2)
{ wxUString ret( s1 ); ret.append( (wxChar32) s2.GetValue() ); return ret; }
inline wxUString operator+(const wxUString &s1, wxUniCharRef s2)
{ wxUString ret( s1 ); ret.append( (wxChar32) s2.GetValue() ); return ret; }
inline wxUString operator+(const char *s1, const wxUString &s2)
{ return wxUString(s1) + s2; }
inline wxUString operator+(const wxString &s1, const wxUString &s2)
{ return wxUString(s1) + s2; }
inline wxUString operator+(const wxCStrData *s1, const wxUString &s2)
{ return wxUString(s1) + s2; }
inline wxUString operator+(const wxChar16* s1, const wxUString &s2)
{ return wxUString(s1) + s2; }
inline wxUString operator+(const wxChar32 *s1, const wxUString &s2)
{ return wxUString(s1) + s2; }
inline wxUString operator+(const wxCharBuffer &s1, const wxUString &s2)
{ return wxUString(s1) + s2; }
inline wxUString operator+(const wxU16CharBuffer &s1, const wxUString &s2)
{ return wxUString(s1) + s2; }
inline wxUString operator+(const wxU32CharBuffer &s1, const wxUString &s2)
{ return wxUString(s1) + s2; }
inline wxUString operator+(char s1, const wxUString &s2)
{ return wxUString(s1) + s2; }
inline wxUString operator+(wxChar32 s1, const wxUString &s2 )
{ return wxUString(s1) + s2; }
inline wxUString operator+(wxChar16 s1, const wxUString &s2)
{ return wxUString(s1) + s2; }
inline wxUString operator+(wxUniChar s1, const wxUString &s2)
{ return wxUString(s1) + s2; }
inline wxUString operator+(wxUniCharRef s1, const wxUString &s2)
{ return wxUString(s1) + s2; }
inline bool operator==(const wxUString& s1, const wxUString& s2)
{ return s1.compare( s2 ) == 0; }
inline bool operator!=(const wxUString& s1, const wxUString& s2)
{ return s1.compare( s2 ) != 0; }
inline bool operator< (const wxUString& s1, const wxUString& s2)
{ wxPrintf( "test\n"); return s1.compare( s2 ) < 0; }
inline bool operator> (const wxUString& s1, const wxUString& s2)
{ return s1.compare( s2 ) > 0; }
inline bool operator<=(const wxUString& s1, const wxUString& s2)
{ return s1.compare( s2 ) <= 0; }
inline bool operator>=(const wxUString& s1, const wxUString& s2)
{ return s1.compare( s2 ) >= 0; }
#define wxUSTRING_COMP_OPERATORS( T ) \
inline bool operator==(const wxUString& s1, T s2) \
{ return s1.compare( wxUString(s2) ) == 0; } \
inline bool operator!=(const wxUString& s1, T s2) \
{ return s1.compare( wxUString(s2) ) != 0; } \
inline bool operator< (const wxUString& s1, T s2) \
{ return s1.compare( wxUString(s2) ) < 0; } \
inline bool operator> (const wxUString& s1, T s2) \
{ return s1.compare( wxUString(s2) ) > 0; } \
inline bool operator<=(const wxUString& s1, T s2) \
{ return s1.compare( wxUString(s2) ) <= 0; } \
inline bool operator>=(const wxUString& s1, T s2) \
{ return s1.compare( wxUString(s2) ) >= 0; } \
\
inline bool operator==(T s2, const wxUString& s1) \
{ return s1.compare( wxUString(s2) ) == 0; } \
inline bool operator!=(T s2, const wxUString& s1) \
{ return s1.compare( wxUString(s2) ) != 0; } \
inline bool operator< (T s2, const wxUString& s1) \
{ return s1.compare( wxUString(s2) ) > 0; } \
inline bool operator> (T s2, const wxUString& s1) \
{ return s1.compare( wxUString(s2) ) < 0; } \
inline bool operator<=(T s2, const wxUString& s1) \
{ return s1.compare( wxUString(s2) ) >= 0; } \
inline bool operator>=(T s2, const wxUString& s1) \
{ return s1.compare( wxUString(s2) ) <= 0; }
wxUSTRING_COMP_OPERATORS( const wxString & )
wxUSTRING_COMP_OPERATORS( const char * )
wxUSTRING_COMP_OPERATORS( const wxChar16 * )
wxUSTRING_COMP_OPERATORS( const wxChar32 * )
wxUSTRING_COMP_OPERATORS( const wxCharBuffer & )
wxUSTRING_COMP_OPERATORS( const wxU16CharBuffer & )
wxUSTRING_COMP_OPERATORS( const wxU32CharBuffer & )
wxUSTRING_COMP_OPERATORS( const wxCStrData * )
#endif
// _WX_USTRING_H_BASE_

View File

@@ -82,6 +82,9 @@ public:
and Linux, too, you can specify this on the command line with the
@c configure @c --disable-utf8 switch.
If you need a Unicode string class with O(1) access on all platforms
you should consider using wxUString.
Since iterating over a wxString by index can become inefficient in UTF-8
mode and iterators should be used instead of index based access:
@@ -268,7 +271,7 @@ public:
::Objects, ::wxEmptyString,
@see @ref overview_string "wxString overview", @ref overview_unicode
"Unicode overview"
"Unicode overview", wxUString
*/
class wxString
{

289
interface/wx/ustring.h Normal file
View File

@@ -0,0 +1,289 @@
/////////////////////////////////////////////////////////////////////////////
// Name: wx/ustring.h
// Purpose: interface of wxUString
// Author: Robert Roebling
// Copyright: (c) Robert Roebling
// RCS-ID: $Id: tab.h 37400 2006-02-09 00:28:34Z VZ $
// Licence: wxWindows licence
/////////////////////////////////////////////////////////////////////////////
/**
@class wxUString
wxUString is a class representing a Unicode character string where
each character is stored using a 32-bit value. This is different from
wxString which may store a character either as a UTF-8 or as a UTF-16
sequence and different from @c std::string which stores a string
as a squence of simple 8-bit charactesr and also different from
@c std::wstring which stores the string differently depending on
the definition of wchar_t.
The main purpose of wxUString is a to give users a Unicode string
class that has O(1) access to its content, to be identical on all
platforms and to be easily convertable to wxString as well as other
ways to store strings (C string literals, wide character
string literals, character buffer, etc) by providing many overload
and built-in conversion to and from the various format.
wxUString derives from @c std::basic_string<wxChar32> and therefore
offers the complete API of @c std::string.
@library{wxbase}
@category{data}
@see wxString, @ref overview_string "wxString overview", @ref overview_unicode
"Unicode overview"
*/
class WXDLLIMPEXP_BASE wxUString: public std::basic_string<wxChar32>
{
public:
/**
Default constructor.
*/
wxUString();
/**
Copy constructor.
*/
wxUString( const wxUString &str )
/**
Constructs a string from a 32-bit string literal.
*/
wxUString( const wxChar32 *str )
/**
Constructs a string from 32-bit string buffer.
*/
wxUString( const wxU32CharBuffer &buf )
/**
Constructs a string from C string literal using wxConvLibc to convert it to Unicode.
*/
wxUString( const char *str )
/**
Constructs a string from C string buffer using wxConvLibc to convert it to Unicode.
*/
wxUString( const wxCharBuffer &buf )
/**
Constructs a string from C string literal using @a conv to convert it to Unicode.
*/
wxUString( const char *str, const wxMBConv &conv )
/**
Constructs a string from C string literal using @a conv to convert it to Unicode.
*/
wxUString( const wxCharBuffer &buf, const wxMBConv &conv )
/**
Constructs a string from UTF-16 string literal
*/
wxUString( const wxChar16 *str )
/**
Constructs a string from UTF-16 string buffer
*/
wxUString( const wxU16CharBuffer &buf )
/**
Constructs a string from wxString.
*/
wxUString( const wxString &str )
/**
Constructs a string from using wxConvLibc to convert it to Unicode.
*/
wxUString( char ch )
/**
Constructs a string from a UTF-16 character.
*/
wxUString( wxChar16 ch )
/**
Constructs a string from 32-bit Unicode character.
*/
wxUString( wxChar32 ch )
/**
Constructs a string from wxUniChar (returned by wxString's access operator)
*/
wxUString( wxUniChar ch )
/**
Constructs a string from wxUniCharRef (returned by wxString's access operator)
*/
wxUString( wxUniCharRef ch )
/**
Constructs a string from @a n characters @a ch.
*/
wxUString( size_type n, char ch )
/**
Constructs a string from @a n characters @a ch.
*/
wxUString( size_type n, wxChar16 ch )
/**
Constructs a string from @a n characters @a ch.
*/
wxUString( size_type n, wxChar32 ch )
/**
Constructs a string from @a n characters @a ch.
*/
wxUString( size_type n, wxUniChar ch )
/**
Constructs a string from @a n characters @a ch.
*/
wxUString( size_type n, wxUniCharRef ch )
/**
Static construction of a wxUString from a 7-bit ASCII string
*/
static wxUString FromAscii( const char *str, size_type n );
/**
Static construction of a wxUString from a 7-bit ASCII string
*/
static wxUString FromAscii( const char *str );
/**
Static construction of a wxUString from a UTF-8 encoded string
*/
static wxUString FromUTF8( const char *str, size_type n );
/**
Static construction of a wxUString from a UTF-8 encoded string
*/
static wxUString FromUTF8( const char *str );
/**
Static construction of a wxUString from a UTF-16 encoded string
*/
static wxUString FromUTF16( const wxChar16 *str, size_type n );
/**
Static construction of a wxUString from a UTF-16 encoded string
*/
static wxUString FromUTF16( const wxChar16 *str );
/**
Assigment from a 7-bit ASCII string literal
*/
wxUString &assignFromAscii( const char *str );
/**
Assigment from a 7-bit ASCII string literal
*/
wxUString &assignFromAscii( const char *str, size_type n );
/**
Assigment from a UTF-8 string literal
*/
wxUString &assignFromUTF8( const char *str );
/**
Assigment from a UTF-8 string literal
*/
wxUString &assignFromUTF8( const char *str, size_type n );
/**
Assigment from a UTF-16 string literal
*/
wxUString &assignFromUTF16( const wxChar16* str );
/**
Assigment from a UTF-16 string literal
*/
wxUString &assignFromUTF16( const wxChar16* str, size_type n );
/**
Assigment from a C string literal using wxConvLibc
*/
wxUString &assignFromCString( const char* str );
/**
Assigment from a C string literal using @a conv
*/
wxUString &assignFromCString( const char* str, const wxMBConv &conv );
/**
Conversion to a UTF-8 string
*/
wxCharBuffer utf8_str() const;
/**
Conversion to a UTF-16 string
*/
wxU16CharBuffer utf16_str() const;
/**
Conversion to a wide character string (either UTF-16
or UCS-4, depending on the size of wchar_t).
*/
wxWCharBuffer wc_str() const;
/**
Implicit conversion to wxString.
*/
operator wxString() const;
/**
wxUString assignment. wxUString addtionally provides overloads for
wxString, C string, UTF-16 strings, 32-bit strings, char buffers,
single and repeated characters etc.
*/
wxUString &assign( const wxUString &str );
/**
Appending. wxUString addtionally provides overloads for
wxString, C string, UTF-16 strings, 32-bit strings, char buffers,
single and repeated characters etc.
*/
wxUString &append( const wxUString &s );
/**
Insertion. wxUString addtionally provides overloads for
wxString, C string, UTF-16 strings, 32-bit strings, char buffers,
single characters etc.
*/
wxUString &insert( size_type pos, const wxUString &s );
/**
Assignment operator. wxUString addtionally provides overloads for
wxString, C string, UTF-16 strings, 32-bit strings, char buffers,
single characters etc.
*/
inline wxUString& operator=(const wxUString& s);
/**
Concatenation operator. wxUString addtionally provides overloads for
wxString, C string, UTF-16 strings, 32-bit strings, char buffers,
single characters etc.
*/
inline wxUString& operator+=(const wxUString& s);
};
/**
Concatenation operator. wxUString addtionally provides overloads for
wxString, C string, UTF-16 strings, 32-bit strings, char buffers,
single characters etc.
*/
inline wxUString operator+(const wxUString &s1, const wxUString &s2);
/**
Equality operator. wxUString addtionally provides overloads for
wxString, C string, UTF-16 strings, 32-bit strings, char buffers,
single characters etc.
*/
inline bool operator==(const wxUString& s1, const wxUString& s2);
/**
Inequality operator. wxUString addtionally provides overloads for
wxString, C string, UTF-16 strings, 32-bit strings, char buffers,
single characters etc.
*/
inline bool operator!=(const wxUString& s1, const wxUString& s2);
/**
Comparison operator. wxUString addtionally provides overloads for
wxString, C string, UTF-16 strings, 32-bit strings, char buffers,
single characters etc.
*/
inline bool operator< (const wxUString& s1, const wxUString& s2);
/**
Comparison operator. wxUString addtionally provides overloads for
wxString, C string, UTF-16 strings, 32-bit strings, char buffers,
single characters etc.
*/
inline bool operator> (const wxUString& s1, const wxUString& s2);
/**
Comparison operator. wxUString addtionally provides overloads for
wxString, C string, UTF-16 strings, 32-bit strings, char buffers,
single characters etc.
*/
inline bool operator<=(const wxUString& s1, const wxUString& s2);
/**
Comparison operator. wxUString addtionally provides overloads for
wxString, C string, UTF-16 strings, 32-bit strings, char buffers,
single characters etc.
*/
inline bool operator>=(const wxUString& s1, const wxUString& s2);
#endif
// _WX_USTRING_H_BASE_

548
src/common/ustring.cpp Normal file
View File

@@ -0,0 +1,548 @@
/////////////////////////////////////////////////////////////////////////////
// Name: src/common/ustring.cpp
// Purpose: wxUString class
// Author: Robert Roebling
// Created: 2008-07-25
// RCS-ID: $Id:$
// Copyright: (c) 2008 Robert Roebling
// Licence: wxWindows licence
///////////////////////////////////////////////////////////////////////////////
// For compilers that support precompilation, includes "wx.h".
#include "wx/wxprec.h"
#ifdef __BORLANDC__
#pragma hdrstop
#endif
#ifndef WX_PRECOMP
#include "wx/strconv.h" // wxConvLibc
#include "wx/log.h"
#endif
#include "wx/ustring.h"
#include "wx/unichar.h"
#include "wx/string.h"
wxUString &wxUString::assignFromAscii( const char *str )
{
size_type len = wxStrlen( str );
wxU32CharBuffer buffer( len );
wxChar32 *ptr = buffer.data();
size_type i;
for (i = 0; i < len; i++)
{
*ptr = *str;
ptr++;
str++;
}
return assign( buffer );
}
wxUString &wxUString::assignFromAscii( const char *str, size_type n )
{
size_type len = 0;
const char *s = str;
while (len < n && *s)
{
len++;
s++;
}
wxU32CharBuffer buffer( len );
wxChar32 *ptr = buffer.data();
size_type i;
for (i = 0; i < len; i++)
{
*ptr = *str;
ptr++;
str++;
}
return *this;
}
// ----------------------------------------------------------------------------
// UTF-8
// ----------------------------------------------------------------------------
static const wxUint32 utf8_max[]=
{ 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
// this table gives the length of the UTF-8 encoding from its first character:
const unsigned char tableUtf8Lengths[256] = {
// single-byte sequences (ASCII):
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 00..0F
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 10..1F
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 20..2F
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 30..3F
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 40..4F
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 50..5F
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 60..6F
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 70..7F
// these are invalid:
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 80..8F
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 90..9F
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // A0..AF
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // B0..BF
0, 0, // C0,C1
// two-byte sequences:
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // C2..CF
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // D0..DF
// three-byte sequences:
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // E0..EF
// four-byte sequences:
4, 4, 4, 4, 4, // F0..F4
// these are invalid again (5- or 6-byte
// sequences and sequences for code points
// above U+10FFFF, as restricted by RFC 3629):
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 // F5..FF
};
wxUString &wxUString::assignFromUTF8( const char *str )
{
if (!str)
return assign( wxUString() );
size_type ucs4_len = 0;
const char *p = str;
while (*p)
{
unsigned char c = *p;
size_type len = tableUtf8Lengths[c];
if (!len)
return assign( wxUString() ); // don't try to convert invalid UTF-8
ucs4_len++;
p += len;
}
wxU32CharBuffer buffer( ucs4_len );
wxChar32 *out = buffer.data();
p = str;
while (*p)
{
unsigned char c = *p;
if (c < 0x80)
{
*out = c;
p++;
}
else
{
size_type len = tableUtf8Lengths[c]; // len == 0 is caught above
// Char. number range | UTF-8 octet sequence
// (hexadecimal) | (binary)
// ----------------------+----------------------------------------
// 0000 0000 - 0000 007F | 0xxxxxxx
// 0000 0080 - 0000 07FF | 110xxxxx 10xxxxxx
// 0000 0800 - 0000 FFFF | 1110xxxx 10xxxxxx 10xxxxxx
// 0001 0000 - 0010 FFFF | 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
//
// Code point value is stored in bits marked with 'x',
// lowest-order bit of the value on the right side in the diagram
// above. (from RFC 3629)
// mask to extract lead byte's value ('x' bits above), by sequence
// length:
static const unsigned char leadValueMask[] = { 0x7F, 0x1F, 0x0F, 0x07 };
// mask and value of lead byte's most significant bits, by length:
static const unsigned char leadMarkerMask[] = { 0x80, 0xE0, 0xF0, 0xF8 };
static const unsigned char leadMarkerVal[] = { 0x00, 0xC0, 0xE0, 0xF0 };
len--; // it's more convenient to work with 0-based length here
// extract the lead byte's value bits:
if ( (c & leadMarkerMask[len]) != leadMarkerVal[len] )
break;
wxChar32 code = c & leadValueMask[len];
// all remaining bytes, if any, are handled in the same way
// regardless of sequence's length:
for ( ; len; --len )
{
c = *++p;
if ( (c & 0xC0) != 0x80 )
return assign( wxUString() ); // don't try to convert invalid UTF-8
code <<= 6;
code |= c & 0x3F;
}
*out = code;
p++;
}
out++;
}
return assign( buffer.data() );
}
wxUString &wxUString::assignFromUTF8( const char *str, size_type n )
{
if (!str)
return assign( wxUString() );
size_type ucs4_len = 0;
size_type utf8_pos = 0;
const char *p = str;
while (*p)
{
unsigned char c = *p;
size_type len = tableUtf8Lengths[c];
if (!len)
return assign( wxUString() ); // don't try to convert invalid UTF-8
if (utf8_pos + len > n)
break;
utf8_pos += len;
ucs4_len ++;
p += len;
}
wxU32CharBuffer buffer( ucs4_len );
wxChar32 *out = buffer.data();
utf8_pos = 0;
p = str;
while (*p)
{
unsigned char c = *p;
if (c < 0x80)
{
if (utf8_pos + 1 > n)
break;
utf8_pos++;
*out = c;
p++;
}
else
{
size_type len = tableUtf8Lengths[c]; // len == 0 is caught above
if (utf8_pos + len > n)
break;
utf8_pos += len;
// Char. number range | UTF-8 octet sequence
// (hexadecimal) | (binary)
// ----------------------+----------------------------------------
// 0000 0000 - 0000 007F | 0xxxxxxx
// 0000 0080 - 0000 07FF | 110xxxxx 10xxxxxx
// 0000 0800 - 0000 FFFF | 1110xxxx 10xxxxxx 10xxxxxx
// 0001 0000 - 0010 FFFF | 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
//
// Code point value is stored in bits marked with 'x',
// lowest-order bit of the value on the right side in the diagram
// above. (from RFC 3629)
// mask to extract lead byte's value ('x' bits above), by sequence
// length:
static const unsigned char leadValueMask[] = { 0x7F, 0x1F, 0x0F, 0x07 };
// mask and value of lead byte's most significant bits, by length:
static const unsigned char leadMarkerMask[] = { 0x80, 0xE0, 0xF0, 0xF8 };
static const unsigned char leadMarkerVal[] = { 0x00, 0xC0, 0xE0, 0xF0 };
len--; // it's more convenient to work with 0-based length here
// extract the lead byte's value bits:
if ( (c & leadMarkerMask[len]) != leadMarkerVal[len] )
break;
wxChar32 code = c & leadValueMask[len];
// all remaining bytes, if any, are handled in the same way
// regardless of sequence's length:
for ( ; len; --len )
{
c = *++p;
if ( (c & 0xC0) != 0x80 )
return assign( wxUString() ); // don't try to convert invalid UTF-8
code <<= 6;
code |= c & 0x3F;
}
*out = code;
p++;
}
out++;
}
*out = 0;
return assign( buffer.data() );
}
wxUString &wxUString::assignFromUTF16( const wxChar16* str, size_type n )
{
if (!str)
return assign( wxUString() );
size_type ucs4_len = 0;
size_type utf16_pos = 0;
const wxChar16 *p = str;
while (*p)
{
size_type len;
if ((*p < 0xd800) || (*p > 0xdfff))
{
len = 1;
}
else if ((p[1] < 0xdc00) || (p[1] > 0xdfff))
{
return assign( wxUString() ); // don't try to convert invalid UTF-16
}
else
{
len = 2;
}
if (utf16_pos + len > n)
break;
ucs4_len++;
p += len;
utf16_pos += len;
}
wxU32CharBuffer buffer( ucs4_len );
wxChar32 *out = buffer.data();
utf16_pos = 0;
p = str;
while (*p)
{
if ((*p < 0xd800) || (*p > 0xdfff))
{
if (utf16_pos + 1 > n)
break;
*out = *p;
p++;
utf16_pos++;
}
else
{
if (utf16_pos + 2 > n)
break;
*out = ((p[0] - 0xd7c0) << 10) + (p[1] - 0xdc00);
p += 2;
utf16_pos += 2;
}
out++;
}
return assign( buffer.data() );
}
wxUString &wxUString::assignFromUTF16( const wxChar16* str )
{
if (!str)
return assign( wxUString() );
size_type ucs4_len = 0;
const wxChar16 *p = str;
while (*p)
{
size_type len;
if ((*p < 0xd800) || (*p > 0xdfff))
{
len = 1;
}
else if ((p[1] < 0xdc00) || (p[1] > 0xdfff))
{
return assign( wxUString() ); // don't try to convert invalid UTF-16
}
else
{
len = 2;
}
ucs4_len++;
p += len;
}
wxU32CharBuffer buffer( ucs4_len );
wxChar32 *out = buffer.data();
p = str;
while (*p)
{
if ((*p < 0xd800) || (*p > 0xdfff))
{
*out = *p;
p++;
}
else
{
*out = ((p[0] - 0xd7c0) << 10) + (p[1] - 0xdc00);
p += 2;
}
out++;
}
return assign( buffer.data() );
}
wxUString &wxUString::assignFromCString( const char* str )
{
if (!str)
return assign( wxUString() );
wxWCharBuffer buffer = wxConvLibc.cMB2WC( str );
return assign( buffer );
}
wxUString &wxUString::assignFromCString( const char* str, const wxMBConv &conv )
{
if (!str)
return assign( wxUString() );
wxWCharBuffer buffer = conv.cMB2WC( str );
return assign( buffer );
}
wxCharBuffer wxUString::utf8_str() const
{
size_type utf8_length = 0;
const wxChar32 *ptr = data();
while (*ptr)
{
wxChar32 code = *ptr;
ptr++;
if ( code <= 0x7F )
{
utf8_length++;
}
else if ( code <= 0x07FF )
{
utf8_length += 2;
}
else if ( code < 0xFFFF )
{
utf8_length += 3;
}
else if ( code <= 0x10FFFF )
{
utf8_length += 4;
}
else
{
// invalid range, skip
}
}
wxCharBuffer result( utf8_length );
char *out = result.data();
ptr = data();
while (*ptr)
{
wxChar32 code = *ptr;
ptr++;
if ( code <= 0x7F )
{
out[0] = (char)code;
out++;
}
else if ( code <= 0x07FF )
{
out[1] = 0x80 | (code & 0x3F); code >>= 6;
out[0] = 0xC0 | code;
out += 2;
}
else if ( code < 0xFFFF )
{
out[2] = 0x80 | (code & 0x3F); code >>= 6;
out[1] = 0x80 | (code & 0x3F); code >>= 6;
out[0] = 0xE0 | code;
out += 3;
}
else if ( code <= 0x10FFFF )
{
out[3] = 0x80 | (code & 0x3F); code >>= 6;
out[2] = 0x80 | (code & 0x3F); code >>= 6;
out[1] = 0x80 | (code & 0x3F); code >>= 6;
out[0] = 0xF0 | code;
out += 4;
}
else
{
// invalid range, skip
}
}
wxPrintf( "utf8_str %s len %d\n", result, wxStrlen( result.data() ) );
wxPrintf( "utf8_str %s len %d\n", result, wxStrlen( result.data() ) );
return result;
}
wxU16CharBuffer wxUString::utf16_str() const
{
size_type utf16_length = 0;
const wxChar32 *ptr = data();
while (*ptr)
{
wxChar32 code = *ptr;
ptr++;
// TODO: error range checks
if (code < 0x10000)
utf16_length++;
else
utf16_length += 2;
}
wxU16CharBuffer result( utf16_length );
wxChar16 *out = result.data();
ptr = data();
while (*ptr)
{
wxChar32 code = *ptr;
ptr++;
// TODO: error range checks
if (code < 0x10000)
{
out[0] = code;
out++;
}
else
{
out[0] = (code - 0x10000) / 0x400 + 0xd800;
out[1] = (code - 0x10000) % 0x400 + 0xdc00;
out += 2;
}
}
return result;
}

View File

@@ -796,6 +796,42 @@ WXDLLIMPEXP_BASE wchar_t * wxCRT_StrdupW(const wchar_t *pwz)
}
#endif // wxCRT_StrdupW
#ifndef wxWCHAR_T_IS_WXCHAR16
WXDLLIMPEXP_BASE size_t wxStrlen(const wxChar16 *s )
{
if (!s) return 0;
size_t i=0;
while (*s!=0) { ++i; ++s; };
return i;
}
WXDLLIMPEXP_BASE wxChar16* wxStrdup(const wxChar16* s)
{
size_t size = (wxStrlen(s) + 1) * sizeof(wxChar16);
wxChar16 *ret = (wxChar16*) malloc(size);
memcpy(ret, s, size);
return ret;
}
#endif
#ifndef wxWCHAR_T_IS_WXCHAR32
WXDLLIMPEXP_BASE size_t wxStrlen(const wxChar32 *s )
{
if (!s) return 0;
size_t i=0;
while (*s!=0) { ++i; ++s; };
return i;
}
WXDLLIMPEXP_BASE wxChar32* wxStrdup(const wxChar32* s)
{
size_t size = (wxStrlen(s) + 1) * sizeof(wxChar32);
wxChar32 *ret = (wxChar32*) malloc(size);
memcpy(ret, s, size);
return ret;
}
#endif
#ifndef wxCRT_StricmpA
WXDLLIMPEXP_BASE int wxCRT_StricmpA(const char *psz1, const char *psz2)
{