added code for optimized handling of UTF-8 locales: some string operations are more efficient under it and it's possible to completely compile-out support for other locales if the target system is known to only use UTF-8 locales

git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/trunk@45782 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775
This commit is contained in:
Václav Slavík
2007-05-03 11:05:04 +00:00
parent d1f6e2cfe2
commit 111d99489d
10 changed files with 329 additions and 110 deletions

326
configure vendored
View File

@@ -1001,6 +1001,7 @@ Optional Features:
--enable-mimetype use wxMimeTypesManager --enable-mimetype use wxMimeTypesManager
--enable-mslu use MS Layer for Unicode on Windows 9x (Win32 only) --enable-mslu use MS Layer for Unicode on Windows 9x (Win32 only)
--enable-utf8 use UTF-8 representation for strings (Unix only) --enable-utf8 use UTF-8 representation for strings (Unix only)
--enable-utf8only only support UTF-8 locales in UTF-8 build (Unix only)
--enable-snglinst use wxSingleInstanceChecker class --enable-snglinst use wxSingleInstanceChecker class
--enable-std_iostreams use standard C++ stream classes --enable-std_iostreams use standard C++ stream classes
--enable-std_string use standard C++ string classes --enable-std_string use standard C++ string classes
@@ -2389,6 +2390,7 @@ if test $DEBUG_CONFIGURE = 1; then
DEFAULT_wxUSE_UNICODE=no DEFAULT_wxUSE_UNICODE=no
DEFAULT_wxUSE_UNICODE_MSLU=no DEFAULT_wxUSE_UNICODE_MSLU=no
DEFAULT_wxUSE_UNICODE_UTF8=no DEFAULT_wxUSE_UNICODE_UTF8=no
DEFAULT_wxUSE_UNICODE_UTF8_LOCALE=no
DEFAULT_wxUSE_WCSRTOMBS=no DEFAULT_wxUSE_WCSRTOMBS=no
DEFAULT_wxUSE_PALETTE=no DEFAULT_wxUSE_PALETTE=no
@@ -2616,6 +2618,7 @@ else
DEFAULT_wxUSE_UNICODE=no DEFAULT_wxUSE_UNICODE=no
DEFAULT_wxUSE_UNICODE_MSLU=yes DEFAULT_wxUSE_UNICODE_MSLU=yes
DEFAULT_wxUSE_UNICODE_UTF8=no DEFAULT_wxUSE_UNICODE_UTF8=no
DEFAULT_wxUSE_UNICODE_UTF8_LOCALE=no
DEFAULT_wxUSE_WCSRTOMBS=no DEFAULT_wxUSE_WCSRTOMBS=no
DEFAULT_wxUSE_PALETTE=yes DEFAULT_wxUSE_PALETTE=yes
@@ -6066,6 +6069,47 @@ echo "${ECHO_T}no" >&6
fi fi
enablestring=
echo "$as_me:$LINENO: checking for --${enablestring:-enable}-utf8only" >&5
echo $ECHO_N "checking for --${enablestring:-enable}-utf8only... $ECHO_C" >&6
no_cache=0
# Check whether --enable-utf8only or --disable-utf8only was given.
if test "${enable_utf8only+set}" = set; then
enableval="$enable_utf8only"
if test "$enableval" = yes; then
ac_cv_use_utf8only='wxUSE_UNICODE_UTF8_LOCALE=yes'
else
ac_cv_use_utf8only='wxUSE_UNICODE_UTF8_LOCALE=no'
fi
else
LINE=`grep "wxUSE_UNICODE_UTF8_LOCALE" ${wx_arg_cache_file}`
if test "x$LINE" != x ; then
eval "DEFAULT_$LINE"
else
no_cache=1
fi
ac_cv_use_utf8only='wxUSE_UNICODE_UTF8_LOCALE='$DEFAULT_wxUSE_UNICODE_UTF8_LOCALE
fi;
eval "$ac_cv_use_utf8only"
if test "$no_cache" != 1; then
echo $ac_cv_use_utf8only >> ${wx_arg_cache_file}.tmp
fi
if test "$wxUSE_UNICODE_UTF8_LOCALE" = yes; then
echo "$as_me:$LINENO: result: yes" >&5
echo "${ECHO_T}yes" >&6
else
echo "$as_me:$LINENO: result: no" >&5
echo "${ECHO_T}no" >&6
fi
enablestring= enablestring=
echo "$as_me:$LINENO: checking for --${enablestring:-enable}-snglinst" >&5 echo "$as_me:$LINENO: checking for --${enablestring:-enable}-snglinst" >&5
echo $ECHO_N "checking for --${enablestring:-enable}-snglinst... $ECHO_C" >&6 echo $ECHO_N "checking for --${enablestring:-enable}-snglinst... $ECHO_C" >&6
@@ -39098,7 +39142,6 @@ echo $ECHO_N "checking how many arguments gethostbyname_r() takes... $ECHO_C" >&
else else
################################################################
ac_cv_func_which_gethostbyname_r=unknown ac_cv_func_which_gethostbyname_r=unknown
@@ -39340,7 +39383,6 @@ rm -f conftest.err conftest.$ac_objext conftest.$ac_ext
fi fi
################################################################
fi fi
@@ -39498,19 +39540,103 @@ _ACEOF
fi fi
fi fi
echo "$as_me:$LINENO: checking for getservbyname_r" >&5
echo "$as_me:$LINENO: checking how many arguments getservbyname_r() takes" >&5 echo $ECHO_N "checking for getservbyname_r... $ECHO_C" >&6
echo $ECHO_N "checking how many arguments getservbyname_r() takes... $ECHO_C" >&6
if test "${ac_cv_func_which_getservbyname_r+set}" = set; then if test "${ac_cv_func_which_getservbyname_r+set}" = set; then
echo $ECHO_N "(cached) $ECHO_C" >&6 echo $ECHO_N "(cached) $ECHO_C" >&6
else else
ac_ext=cc echo "$as_me:$LINENO: checking for getservbyname_r" >&5
ac_cpp='$CXXCPP $CPPFLAGS' echo $ECHO_N "checking for getservbyname_r... $ECHO_C" >&6
ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' if test "${ac_cv_func_getservbyname_r+set}" = set; then
ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' echo $ECHO_N "(cached) $ECHO_C" >&6
ac_compiler_gnu=$ac_cv_cxx_compiler_gnu else
cat >conftest.$ac_ext <<_ACEOF
/* confdefs.h. */
_ACEOF
cat confdefs.h >>conftest.$ac_ext
cat >>conftest.$ac_ext <<_ACEOF
/* end confdefs.h. */
/* Define getservbyname_r to an innocuous variant, in case <limits.h> declares getservbyname_r.
For example, HP-UX 11i <limits.h> declares gettimeofday. */
#define getservbyname_r innocuous_getservbyname_r
/* System header to define __stub macros and hopefully few prototypes,
which can conflict with char getservbyname_r (); below.
Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
<limits.h> exists even on freestanding compilers. */
#ifdef __STDC__
# include <limits.h>
#else
# include <assert.h>
#endif
#undef getservbyname_r
/* Override any gcc2 internal prototype to avoid an error. */
#ifdef __cplusplus
extern "C"
{
#endif
/* We use char because int might match the return type of a gcc2
builtin and then its argument prototype would still apply. */
char getservbyname_r ();
/* The GNU C library defines this for functions which it implements
to always fail with ENOSYS. Some functions are actually named
something starting with __ and the normal name is an alias. */
#if defined (__stub_getservbyname_r) || defined (__stub___getservbyname_r)
choke me
#else
char (*f) () = getservbyname_r;
#endif
#ifdef __cplusplus
}
#endif
int
main ()
{
return f != getservbyname_r;
;
return 0;
}
_ACEOF
rm -f conftest.$ac_objext conftest$ac_exeext
if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5
(eval $ac_link) 2>conftest.er1
ac_status=$?
grep -v '^ *+' conftest.er1 >conftest.err
rm -f conftest.er1
cat conftest.err >&5
echo "$as_me:$LINENO: \$? = $ac_status" >&5
(exit $ac_status); } &&
{ ac_try='test -z "$ac_c_werror_flag"
|| test ! -s conftest.err'
{ (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
(eval $ac_try) 2>&5
ac_status=$?
echo "$as_me:$LINENO: \$? = $ac_status" >&5
(exit $ac_status); }; } &&
{ ac_try='test -s conftest$ac_exeext'
{ (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
(eval $ac_try) 2>&5
ac_status=$?
echo "$as_me:$LINENO: \$? = $ac_status" >&5
(exit $ac_status); }; }; then
ac_cv_func_getservbyname_r=yes
else
echo "$as_me: failed program was:" >&5
sed 's/^/| /' conftest.$ac_ext >&5
ac_cv_func_getservbyname_r=no
fi
rm -f conftest.err conftest.$ac_objext \
conftest$ac_exeext conftest.$ac_ext
fi
echo "$as_me:$LINENO: result: $ac_cv_func_getservbyname_r" >&5
echo "${ECHO_T}$ac_cv_func_getservbyname_r" >&6
if test $ac_cv_func_getservbyname_r = yes; then
cat >conftest.$ac_ext <<_ACEOF cat >conftest.$ac_ext <<_ACEOF
/* confdefs.h. */ /* confdefs.h. */
@@ -39518,17 +39644,20 @@ _ACEOF
cat confdefs.h >>conftest.$ac_ext cat confdefs.h >>conftest.$ac_ext
cat >>conftest.$ac_ext <<_ACEOF cat >>conftest.$ac_ext <<_ACEOF
/* end confdefs.h. */ /* end confdefs.h. */
#include <netdb.h>
# include <netdb.h>
int int
main () main ()
{ {
char *name;
char *proto; char *name;
struct servent *se, *res; char *proto;
char buffer[2048]; struct servent *se;
int buflen = 2048; struct servent_data data;
(void) getservbyname_r(name, proto, se, buffer, buflen, &res) (void) getservbyname_r(name, proto, se, &data);
; ;
return 0; return 0;
@@ -39543,7 +39672,59 @@ if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
cat conftest.err >&5 cat conftest.err >&5
echo "$as_me:$LINENO: \$? = $ac_status" >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5
(exit $ac_status); } && (exit $ac_status); } &&
{ ac_try='test -z "$ac_cxx_werror_flag" { ac_try='test -z "$ac_c_werror_flag"
|| test ! -s conftest.err'
{ (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
(eval $ac_try) 2>&5
ac_status=$?
echo "$as_me:$LINENO: \$? = $ac_status" >&5
(exit $ac_status); }; } &&
{ ac_try='test -s conftest.$ac_objext'
{ (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
(eval $ac_try) 2>&5
ac_status=$?
echo "$as_me:$LINENO: \$? = $ac_status" >&5
(exit $ac_status); }; }; then
ac_cv_func_which_getservbyname_r=four
else
echo "$as_me: failed program was:" >&5
sed 's/^/| /' conftest.$ac_ext >&5
cat >conftest.$ac_ext <<_ACEOF
/* confdefs.h. */
_ACEOF
cat confdefs.h >>conftest.$ac_ext
cat >>conftest.$ac_ext <<_ACEOF
/* end confdefs.h. */
# include <netdb.h>
int
main ()
{
char *name;
char *proto;
struct servent *se, *res;
char buffer[2048];
int buflen = 2048;
(void) getservbyname_r(name, proto, se, buffer, buflen, &res)
;
return 0;
}
_ACEOF
rm -f conftest.$ac_objext
if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
(eval $ac_compile) 2>conftest.er1
ac_status=$?
grep -v '^ *+' conftest.er1 >conftest.err
rm -f conftest.er1
cat conftest.err >&5
echo "$as_me:$LINENO: \$? = $ac_status" >&5
(exit $ac_status); } &&
{ ac_try='test -z "$ac_c_werror_flag"
|| test ! -s conftest.err' || test ! -s conftest.err'
{ (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
(eval $ac_try) 2>&5 (eval $ac_try) 2>&5
@@ -39562,23 +39743,25 @@ else
sed 's/^/| /' conftest.$ac_ext >&5 sed 's/^/| /' conftest.$ac_ext >&5
cat >conftest.$ac_ext <<_ACEOF cat >conftest.$ac_ext <<_ACEOF
/* confdefs.h. */ /* confdefs.h. */
_ACEOF _ACEOF
cat confdefs.h >>conftest.$ac_ext cat confdefs.h >>conftest.$ac_ext
cat >>conftest.$ac_ext <<_ACEOF cat >>conftest.$ac_ext <<_ACEOF
/* end confdefs.h. */ /* end confdefs.h. */
#include <netdb.h>
# include <netdb.h>
int int
main () main ()
{ {
char *name; char *name;
char *proto; char *proto;
struct servent *se; struct servent *se;
char buffer[2048]; char buffer[2048];
int buflen = 2048; int buflen = 2048;
(void) getservbyname_r(name, proto, se, buffer, buflen) (void) getservbyname_r(name, proto, se, buffer, buflen)
; ;
return 0; return 0;
@@ -39593,7 +39776,7 @@ if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
cat conftest.err >&5 cat conftest.err >&5
echo "$as_me:$LINENO: \$? = $ac_status" >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5
(exit $ac_status); } && (exit $ac_status); } &&
{ ac_try='test -z "$ac_cxx_werror_flag" { ac_try='test -z "$ac_c_werror_flag"
|| test ! -s conftest.err' || test ! -s conftest.err'
{ (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
(eval $ac_try) 2>&5 (eval $ac_try) 2>&5
@@ -39611,56 +39794,12 @@ else
echo "$as_me: failed program was:" >&5 echo "$as_me: failed program was:" >&5
sed 's/^/| /' conftest.$ac_ext >&5 sed 's/^/| /' conftest.$ac_ext >&5
cat >conftest.$ac_ext <<_ACEOF
/* confdefs.h. */
_ACEOF
cat confdefs.h >>conftest.$ac_ext
cat >>conftest.$ac_ext <<_ACEOF
/* end confdefs.h. */
#include <netdb.h>
int
main ()
{
char *name;
char *proto;
struct servent *se;
struct servent_data data;
(void) getservbyname_r(name, proto, se, &data);
;
return 0;
}
_ACEOF
rm -f conftest.$ac_objext
if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
(eval $ac_compile) 2>conftest.er1
ac_status=$?
grep -v '^ *+' conftest.er1 >conftest.err
rm -f conftest.er1
cat conftest.err >&5
echo "$as_me:$LINENO: \$? = $ac_status" >&5
(exit $ac_status); } &&
{ ac_try='test -z "$ac_cxx_werror_flag"
|| test ! -s conftest.err'
{ (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
(eval $ac_try) 2>&5
ac_status=$?
echo "$as_me:$LINENO: \$? = $ac_status" >&5
(exit $ac_status); }; } &&
{ ac_try='test -s conftest.$ac_objext'
{ (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
(eval $ac_try) 2>&5
ac_status=$?
echo "$as_me:$LINENO: \$? = $ac_status" >&5
(exit $ac_status); }; }; then
ac_cv_func_which_getservbyname_r=four
else
echo "$as_me: failed program was:" >&5
sed 's/^/| /' conftest.$ac_ext >&5
ac_cv_func_which_getservbyname_r=no ac_cv_func_which_getservbyname_r=no
fi
rm -f conftest.err conftest.$ac_objext conftest.$ac_ext
fi fi
rm -f conftest.err conftest.$ac_objext conftest.$ac_ext rm -f conftest.err conftest.$ac_objext conftest.$ac_ext
@@ -39669,16 +39808,9 @@ rm -f conftest.err conftest.$ac_objext conftest.$ac_ext
fi fi
rm -f conftest.err conftest.$ac_objext conftest.$ac_ext rm -f conftest.err conftest.$ac_objext conftest.$ac_ext
else
ac_cv_func_which_getservbyname_r=no
fi fi
rm -f conftest.err conftest.$ac_objext conftest.$ac_ext
ac_ext=c
ac_cpp='$CPP $CPPFLAGS'
ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
ac_compiler_gnu=$ac_cv_c_compiler_gnu
fi fi
echo "$as_me:$LINENO: result: $ac_cv_func_which_getservbyname_r" >&5 echo "$as_me:$LINENO: result: $ac_cv_func_which_getservbyname_r" >&5
@@ -39699,6 +39831,7 @@ elif test $ac_cv_func_which_getservbyname_r = four; then
#define HAVE_FUNC_GETSERVBYNAME_R_4 1 #define HAVE_FUNC_GETSERVBYNAME_R_4 1
_ACEOF _ACEOF
fi fi
@@ -43472,6 +43605,13 @@ if test "$wxUSE_UNICODE" = "yes" -a "$wxUSE_UNICODE_UTF8" = "yes"; then
#define wxUSE_UNICODE_UTF8 1 #define wxUSE_UNICODE_UTF8 1
_ACEOF _ACEOF
if test "$wxUSE_UNICODE_UTF8_LOCALE" = "yes"; then
cat >>confdefs.h <<\_ACEOF
#define wxUSE_UTF8_LOCALE_ONLY 1
_ACEOF
fi
fi fi
if test "$wxUSE_wxUSE_EXPERIMENTAL_PRINTF" = "yes"; then if test "$wxUSE_wxUSE_EXPERIMENTAL_PRINTF" = "yes"; then
@@ -46067,7 +46207,10 @@ echo "${ECHO_T}$bakefile_cv_prog_makeisgnu" >&6
PLATFORM_BEOS=1 PLATFORM_BEOS=1
;; ;;
* ) * )
;; { { echo "$as_me:$LINENO: error: Unknown platform: $BAKEFILE_FORCE_PLATFORM" >&5
echo "$as_me: error: Unknown platform: $BAKEFILE_FORCE_PLATFORM" >&2;}
{ (exit 1); exit 1; }; }
;;
esac esac
fi fi
@@ -48802,10 +48945,21 @@ echo "${ECHO_T}no" >&6
cppunit_major_min=`echo $cppunit_version_min | \ cppunit_major_min=`echo $cppunit_version_min | \
sed 's/\([0-9]*\).\([0-9]*\).\([0-9]*\)/\1/'` sed 's/\([0-9]*\).\([0-9]*\).\([0-9]*\)/\1/'`
if test "x${cppunit_major_min}" = "x" ; then
cppunit_major_min=0
fi
cppunit_minor_min=`echo $cppunit_version_min | \ cppunit_minor_min=`echo $cppunit_version_min | \
sed 's/\([0-9]*\).\([0-9]*\).\([0-9]*\)/\2/'` sed 's/\([0-9]*\).\([0-9]*\).\([0-9]*\)/\2/'`
if test "x${cppunit_minor_min}" = "x" ; then
cppunit_minor_min=0
fi
cppunit_micro_min=`echo $cppunit_version_min | \ cppunit_micro_min=`echo $cppunit_version_min | \
sed 's/\([0-9]*\).\([0-9]*\).\([0-9]*\)/\3/'` sed 's/\([0-9]*\).\([0-9]*\).\([0-9]*\)/\3/'`
if test "x${cppunit_micro_min}" = "x" ; then
cppunit_micro_min=0
fi
cppunit_version_proper=`expr \ cppunit_version_proper=`expr \
$cppunit_major_version \> $cppunit_major_min \| \ $cppunit_major_version \> $cppunit_major_min \| \

View File

@@ -578,6 +578,7 @@ if test $DEBUG_CONFIGURE = 1; then
DEFAULT_wxUSE_UNICODE=no DEFAULT_wxUSE_UNICODE=no
DEFAULT_wxUSE_UNICODE_MSLU=no DEFAULT_wxUSE_UNICODE_MSLU=no
DEFAULT_wxUSE_UNICODE_UTF8=no DEFAULT_wxUSE_UNICODE_UTF8=no
DEFAULT_wxUSE_UNICODE_UTF8_LOCALE=no
DEFAULT_wxUSE_WCSRTOMBS=no DEFAULT_wxUSE_WCSRTOMBS=no
DEFAULT_wxUSE_PALETTE=no DEFAULT_wxUSE_PALETTE=no
@@ -805,6 +806,7 @@ else
DEFAULT_wxUSE_UNICODE=no DEFAULT_wxUSE_UNICODE=no
DEFAULT_wxUSE_UNICODE_MSLU=yes DEFAULT_wxUSE_UNICODE_MSLU=yes
DEFAULT_wxUSE_UNICODE_UTF8=no DEFAULT_wxUSE_UNICODE_UTF8=no
DEFAULT_wxUSE_UNICODE_UTF8_LOCALE=no
DEFAULT_wxUSE_WCSRTOMBS=no DEFAULT_wxUSE_WCSRTOMBS=no
DEFAULT_wxUSE_PALETTE=yes DEFAULT_wxUSE_PALETTE=yes
@@ -993,6 +995,7 @@ WX_ARG_ENABLE(mimetype, [ --enable-mimetype use wxMimeTypesManager],
WX_ARG_ENABLE(mslu, [ --enable-mslu use MS Layer for Unicode on Windows 9x (Win32 only)], wxUSE_UNICODE_MSLU) WX_ARG_ENABLE(mslu, [ --enable-mslu use MS Layer for Unicode on Windows 9x (Win32 only)], wxUSE_UNICODE_MSLU)
dnl FIXME-UTF8: make UTF8 automatic dnl FIXME-UTF8: make UTF8 automatic
WX_ARG_ENABLE(utf8, [ --enable-utf8 use UTF-8 representation for strings (Unix only)], wxUSE_UNICODE_UTF8) WX_ARG_ENABLE(utf8, [ --enable-utf8 use UTF-8 representation for strings (Unix only)], wxUSE_UNICODE_UTF8)
WX_ARG_ENABLE(utf8only, [ --enable-utf8only only support UTF-8 locales in UTF-8 build (Unix only)], wxUSE_UNICODE_UTF8_LOCALE)
WX_ARG_ENABLE(snglinst, [ --enable-snglinst use wxSingleInstanceChecker class], wxUSE_SNGLINST_CHECKER) WX_ARG_ENABLE(snglinst, [ --enable-snglinst use wxSingleInstanceChecker class], wxUSE_SNGLINST_CHECKER)
WX_ARG_ENABLE(std_iostreams, [ --enable-std_iostreams use standard C++ stream classes], wxUSE_STD_IOSTREAM) WX_ARG_ENABLE(std_iostreams, [ --enable-std_iostreams use standard C++ stream classes], wxUSE_STD_IOSTREAM)
WX_ARG_ENABLE(std_string, [ --enable-std_string use standard C++ string classes], wxUSE_STD_STRING) WX_ARG_ENABLE(std_string, [ --enable-std_string use standard C++ string classes], wxUSE_STD_STRING)
@@ -6492,6 +6495,10 @@ fi
if test "$wxUSE_UNICODE" = "yes" -a "$wxUSE_UNICODE_UTF8" = "yes"; then if test "$wxUSE_UNICODE" = "yes" -a "$wxUSE_UNICODE_UTF8" = "yes"; then
AC_DEFINE(wxUSE_UNICODE_UTF8) AC_DEFINE(wxUSE_UNICODE_UTF8)
if test "$wxUSE_UNICODE_UTF8_LOCALE" = "yes"; then
AC_DEFINE(wxUSE_UTF8_LOCALE_ONLY)
fi
fi fi
if test "$wxUSE_wxUSE_EXPERIMENTAL_PRINTF" = "yes"; then if test "$wxUSE_wxUSE_EXPERIMENTAL_PRINTF" = "yes"; then

View File

@@ -135,6 +135,12 @@ public:
// encoding // encoding
static size_t GetMaxMBNulLen() { return 4 /* for UTF-32 */; } static size_t GetMaxMBNulLen() { return 4 /* for UTF-32 */; }
#if wxUSE_UNICODE_UTF8
// return true if the converter's charset is UTF-8, i.e. char* strings
// decoded using this object can be directly copied to wxString's internal
// storage without converting to WC and than back to UTF-8 MB string
virtual bool IsUTF8() const { return false; }
#endif
// The old conversion functions. The existing classes currently mostly // The old conversion functions. The existing classes currently mostly
// implement these ones but we're in transition to using To/FromWChar() // implement these ones but we're in transition to using To/FromWChar()
@@ -175,6 +181,10 @@ public:
virtual size_t WC2MB(char *outputBuf, const wchar_t *psz, size_t outputSize) const; virtual size_t WC2MB(char *outputBuf, const wchar_t *psz, size_t outputSize) const;
virtual wxMBConv *Clone() const { return new wxMBConvLibc; } virtual wxMBConv *Clone() const { return new wxMBConvLibc; }
#if wxUSE_UNICODE_UTF8
virtual bool IsUTF8() const { return wxLocaleIsUtf8; }
#endif
}; };
#ifdef __UNIX__ #ifdef __UNIX__
@@ -244,6 +254,8 @@ public:
class WXDLLIMPEXP_BASE wxMBConvUTF8 : public wxMBConv class WXDLLIMPEXP_BASE wxMBConvUTF8 : public wxMBConv
{ {
public: public:
// FIXME-UTF8: split this class into multiple classes, one strict and
// other lossy (PUA, OCTAL mappings)
enum enum
{ {
MAP_INVALID_UTF8_NOT = 0, MAP_INVALID_UTF8_NOT = 0,
@@ -257,6 +269,12 @@ public:
virtual wxMBConv *Clone() const { return new wxMBConvUTF8(m_options); } virtual wxMBConv *Clone() const { return new wxMBConvUTF8(m_options); }
#if wxUSE_UNICODE_UTF8
// NB: other mapping modes are not, strictly speaking, UTF-8, so we can't
// take the shortcut in that case
virtual bool IsUTF8() const { return m_options == MAP_INVALID_UTF8_NOT; }
#endif
private: private:
int m_options; int m_options;
}; };

View File

@@ -201,7 +201,7 @@ public:
const wchar_t* AsWChar() const; const wchar_t* AsWChar() const;
operator const wchar_t*() const { return AsWChar(); } operator const wchar_t*() const { return AsWChar(); }
#if !wxUSE_UNICODE #if !wxUSE_UNICODE || wxUSE_UTF8_LOCALE_ONLY
inline inline
#endif #endif
const char* AsChar() const; const char* AsChar() const;
@@ -477,9 +477,6 @@ private:
#else // wxUSE_UNICODE_UTF8 #else // wxUSE_UNICODE_UTF8
// FIXME-UTF8: return as-is without copying under UTF8 locale, return
// converted string under other locales - needs wxCharBuffer
// changes
static wxCharBuffer ImplStr(const char* str, static wxCharBuffer ImplStr(const char* str,
const wxMBConv& conv = wxConvLibc) const wxMBConv& conv = wxConvLibc)
{ return ConvertStr(str, npos, conv).data; } { return ConvertStr(str, npos, conv).data; }
@@ -931,8 +928,7 @@ public:
{ return wxStdWideString(wc_str()); } { return wxStdWideString(wc_str()); }
#endif #endif
#if !wxUSE_UNICODE && wxUSE_STL_BASED_WXSTRING #if (!wxUSE_UNICODE || wxUSE_UTF8_LOCALE_ONLY) && wxUSE_STL_BASED_WXSTRING
// FIXME-UTF8: do this in UTF8 build #if wxUSE_UTF8_LOCALE_ONLY, too
// wxStringImpl is std::string in the encoding we want // wxStringImpl is std::string in the encoding we want
operator const std::string&() const { return m_impl; } operator const std::string&() const { return m_impl; }
#else #else
@@ -941,8 +937,7 @@ public:
// FIXME-UTF8: broken for embedded NULs // FIXME-UTF8: broken for embedded NULs
{ return std::string(mb_str()); } { return std::string(mb_str()); }
#endif #endif
#endif // wxUSE_STL
#endif // wxUSE_STD_STRING
// first valid index position // first valid index position
const_iterator begin() const { return const_iterator(m_impl.begin()); } const_iterator begin() const { return const_iterator(m_impl.begin()); }
@@ -1161,7 +1156,13 @@ public:
// type differs because a function may either return pointer to the buffer // type differs because a function may either return pointer to the buffer
// directly or have to use intermediate buffer for translation. // directly or have to use intermediate buffer for translation.
#if wxUSE_UNICODE #if wxUSE_UNICODE
#if wxUSE_UTF8_LOCALE_ONLY
const char* mb_str() const { return wx_str(); }
const wxCharBuffer mb_str(const wxMBConv& conv) const;
#else
const wxCharBuffer mb_str(const wxMBConv& conv = wxConvLibc) const; const wxCharBuffer mb_str(const wxMBConv& conv = wxConvLibc) const;
#endif
const wxWX2MBbuf mbc_str() const { return mb_str(*wxConvCurrent); } const wxWX2MBbuf mbc_str() const { return mb_str(*wxConvCurrent); }
@@ -2428,7 +2429,7 @@ private:
T *m_buf; T *m_buf;
}; };
#if wxUSE_UNICODE #if wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
ConvertedBuffer<char> m_convertedToChar; ConvertedBuffer<char> m_convertedToChar;
#endif #endif
#if !wxUSE_UNICODE_WCHAR #if !wxUSE_UNICODE_WCHAR
@@ -2821,10 +2822,10 @@ inline const wchar_t* wxCStrData::AsWChar() const
} }
#endif // wxUSE_UNICODE_WCHAR #endif // wxUSE_UNICODE_WCHAR
#if !wxUSE_UNICODE #if !wxUSE_UNICODE || wxUSE_UTF8_LOCALE_ONLY
inline const char* wxCStrData::AsChar() const inline const char* wxCStrData::AsChar() const
{ {
return m_str->wx_str() + m_offset; return wxStringOperations::AddToIter(m_str->wx_str(), m_offset);
} }
#endif // !wxUSE_UNICODE #endif // !wxUSE_UNICODE

View File

@@ -65,7 +65,8 @@ struct WXDLLIMPEXP_BASE wxStringOperationsWchar
struct WXDLLIMPEXP_BASE wxStringOperationsUtf8 struct WXDLLIMPEXP_BASE wxStringOperationsUtf8
{ {
// checks correctness of UTF-8 sequence // checks correctness of UTF-8 sequence
static bool IsValidUtf8String(const char *c); static bool IsValidUtf8String(const char *c,
size_t len = wxStringImpl::npos);
#ifdef __WXDEBUG__ #ifdef __WXDEBUG__
static bool IsValidUtf8LeadByte(unsigned char c); static bool IsValidUtf8LeadByte(unsigned char c);
#endif #endif

View File

@@ -562,6 +562,8 @@
#define wxUSE_UNICODE_UTF8 0 #define wxUSE_UNICODE_UTF8 0
#define wxUSE_UTF8_LOCALE_ONLY 0
#define wxUSE_DC_CACHEING 0 #define wxUSE_DC_CACHEING 0
#define wxUSE_GADGETS 0 #define wxUSE_GADGETS 0

View File

@@ -220,9 +220,16 @@ wxString::~wxString()
} }
#endif #endif
#if wxUSE_UNICODE #if wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
const char* wxCStrData::AsChar() const const char* wxCStrData::AsChar() const
{ {
#if wxUSE_UNICODE_UTF8
if ( wxLocaleIsUtf8 )
return AsInternal();
#endif
// under non-UTF8 locales, we have to convert the internal UTF-8
// representation using wxConvLibc and cache the result
wxString *str = wxConstCast(m_str, wxString); wxString *str = wxConstCast(m_str, wxString);
// convert the string: // convert the string:
@@ -244,7 +251,7 @@ const char* wxCStrData::AsChar() const
// and keep it: // and keep it:
return str->m_convertedToChar + m_offset; return str->m_convertedToChar + m_offset;
} }
#endif // wxUSE_UNICODE #endif // wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
#if !wxUSE_UNICODE_WCHAR #if !wxUSE_UNICODE_WCHAR
const wchar_t* wxCStrData::AsWChar() const const wchar_t* wxCStrData::AsWChar() const
@@ -306,14 +313,23 @@ wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength, wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
const wxMBConv& conv) const wxMBConv& conv)
{ {
// FIXME-UTF8: return as-is without copying under UTF8 locale, return
// converted string under other locales - needs wxCharBuffer
// changes
// anything to do? // anything to do?
if ( !psz || nLength == 0 ) if ( !psz || nLength == 0 )
return SubstrBufFromMB("", 0); return SubstrBufFromMB("", 0);
// if psz is already in UTF-8, we don't have to do the roundtrip to
// wchar_t* and back:
if ( conv.IsUTF8() )
{
// we need to validate the input because UTF8 iterators assume valid
// UTF-8 sequence and psz may be invalid:
if ( wxStringOperations::IsValidUtf8String(psz, nLength) )
{
return SubstrBufFromMB(wxCharBuffer::CreateNonOwned(psz), nLength);
}
// else: do the roundtrip through wchar_t*
}
if ( nLength == npos ) if ( nLength == npos )
nLength = wxNO_LEN; nLength = wxNO_LEN;
@@ -373,8 +389,9 @@ const wxWCharBuffer wxString::wc_str() const
const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const
{ {
// FIXME-UTF8: optimize the case when conv==wxConvUTF8 or wxConvLibc if ( conv.IsUTF8() )
// under UTF8 locale return wxCharBuffer::CreateNonOwned(m_impl.c_str());
// FIXME-UTF8: use wc_str() here once we have buffers with length // FIXME-UTF8: use wc_str() here once we have buffers with length
size_t wcLen; size_t wcLen;

View File

@@ -87,17 +87,26 @@ unsigned char wxStringOperationsUtf8::ms_utf8IterTable[256] = {
// U+100000..U+10FFFF | F4 | 80..8F | 80..BF | 80..BF | // U+100000..U+10FFFF | F4 | 80..8F | 80..BF | 80..BF |
// -------------------+----------+----------+----------+----------+ // -------------------+----------+----------+----------+----------+
bool wxStringOperationsUtf8::IsValidUtf8String(const char *str) bool wxStringOperationsUtf8::IsValidUtf8String(const char *str, size_t len)
{ {
if ( !str ) if ( !str )
return true; // empty string is UTF8 string return true; // empty string is UTF8 string
const unsigned char *c = (const unsigned char*)str; const unsigned char *c = (const unsigned char*)str;
const unsigned char * const end = (len == wxStringImpl::npos) ? NULL : c + len;
for ( ; *c; ++c ) for ( ; c != end && *c; ++c )
{ {
unsigned char b = *c; unsigned char b = *c;
if ( end != NULL )
{
// if the string is not NULL-terminated, verify we have enough
// bytes in it left for current character's encoding:
if ( c + ms_utf8IterTable[*c] > end )
return false;
}
if ( b <= 0x7F ) // 00..7F if ( b <= 0x7F ) // 00..7F
continue; continue;

View File

@@ -41,7 +41,7 @@ const wxStringCharType *wxArgNormalizerNative<const wxCStrData&>::get() const
return m_value.AsInternal(); return m_value.AsInternal();
} }
#if wxUSE_UNICODE_UTF8 #if wxUSE_UNICODE_UTF8 && !wxUSE_UTF8_LOCALE_ONLY
wxArgNormalizerWchar<const wxString&>::wxArgNormalizerWchar(const wxString& s) wxArgNormalizerWchar<const wxString&>::wxArgNormalizerWchar(const wxString& s)
: wxArgNormalizerWithBuffer<wchar_t>(s.wc_str()) : wxArgNormalizerWithBuffer<wchar_t>(s.wc_str())
{ {
@@ -51,7 +51,7 @@ wxArgNormalizerWchar<const wxCStrData&>::wxArgNormalizerWchar(const wxCStrData&
: wxArgNormalizerWithBuffer<wchar_t>(s.AsWCharBuf()) : wxArgNormalizerWithBuffer<wchar_t>(s.AsWCharBuf())
{ {
} }
#endif // wxUSE_UNICODE_UTF8 #endif // wxUSE_UNICODE_UTF8 && !wxUSE_UTF8_LOCALE_ONLY
wxString wxArgNormalizedString::GetString() const wxString wxArgNormalizedString::GetString() const
{ {

View File

@@ -41,10 +41,15 @@ wxUniChar::value_type wxUniChar::From8bit(char c)
if ( (unsigned char)c < 0x80 ) if ( (unsigned char)c < 0x80 )
return c; return c;
#if wxUSE_UTF8_LOCALE_ONLY
wxFAIL_MSG( _T("invalid UTF-8 character") );
return wxT('?'); // FIXME-UTF8: what to use as failure character?
#else
wchar_t buf[2]; wchar_t buf[2];
if ( wxConvLibc.ToWChar(buf, 2, &c, 1) != 2 ) if ( wxConvLibc.ToWChar(buf, 2, &c, 1) != 2 )
return wxT('?'); // FIXME-UTF8: what to use as failure character? return wxT('?'); // FIXME-UTF8: what to use as failure character?
return buf[0]; return buf[0];
#endif
} }
/* static */ /* static */
@@ -54,11 +59,16 @@ char wxUniChar::To8bit(wxUniChar::value_type c)
if ( c < 0x80 ) if ( c < 0x80 )
return c; return c;
#if wxUSE_UTF8_LOCALE_ONLY
wxFAIL_MSG( _T("character cannot be converted to single UTF-8 byte") );
return '?'; // FIXME-UTF8: what to use as failure character?
#else
wchar_t in = c; wchar_t in = c;
char buf[2]; char buf[2];
if ( wxConvLibc.FromWChar(buf, 2, &in, 1) != 2 ) if ( wxConvLibc.FromWChar(buf, 2, &in, 1) != 2 )
return '?'; // FIXME-UTF8: what to use as failure character? return '?'; // FIXME-UTF8: what to use as failure character?
return buf[0]; return buf[0];
#endif
} }