initial version of UTF-8 strings representation (still converting to wchar_t* a lot); it has to be explicitly enabled with --enable-utf8 for now

git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/trunk@45433 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775
2007-04-12 21:15:07 +00:00
parent 5b077ec744
commit 817270659e
18 changed files with 1286 additions and 257 deletions
--- a/336
+++ b/336
@@ -1639,6 +1639,7 @@ Optional Features:
  --enable-optimise       create optimised code
  --enable-debug          same as debug_flag and debug_info
  --enable-stl            use STL for containers
+  --enable-extended_rtti  use extended RTTI (XTI)
  --enable-omf            use OMF object format
  --enable-debug_flag     set __WXDEBUG__ flag (recommended for developers!)
  --enable-debug_info     create code with debugging information
@@ -1688,6 +1689,7 @@ Optional Features:
  --enable-longlong       use wxLongLong class
  --enable-mimetype       use wxMimeTypesManager
  --enable-mslu           use MS Layer for Unicode on Windows 9x (Win32 only)
+  --enable-utf8           use UTF-8 representation for strings (Unix only)
  --enable-snglinst       use wxSingleInstanceChecker class
  --enable-std_iostreams  use standard C++ stream classes
  --enable-std_string     use standard C++ string classes
@@ -2900,6 +2902,7 @@ DEBUG_CONFIGURE=0
 if test $DEBUG_CONFIGURE = 1; then
  DEFAULT_wxUSE_UNIVERSAL=no
  DEFAULT_wxUSE_STL=no
+  DEFAULT_wxUSE_EXTENDED_RTTI=no

  DEFAULT_wxUSE_NANOX=no

@@ -3106,6 +3109,7 @@ if test $DEBUG_CONFIGURE = 1; then

  DEFAULT_wxUSE_UNICODE=no
  DEFAULT_wxUSE_UNICODE_MSLU=no
+  DEFAULT_wxUSE_UNICODE_UTF8=no
  DEFAULT_wxUSE_WCSRTOMBS=no

  DEFAULT_wxUSE_PALETTE=no
@@ -3125,6 +3129,7 @@ if test $DEBUG_CONFIGURE = 1; then
 else
  DEFAULT_wxUSE_UNIVERSAL=no
  DEFAULT_wxUSE_STL=no
+  DEFAULT_wxUSE_EXTENDED_RTTI=no

  DEFAULT_wxUSE_NANOX=no

@@ -3330,6 +3335,7 @@ else

  DEFAULT_wxUSE_UNICODE=no
  DEFAULT_wxUSE_UNICODE_MSLU=yes
+  DEFAULT_wxUSE_UNICODE_UTF8=no
  DEFAULT_wxUSE_WCSRTOMBS=no

  DEFAULT_wxUSE_PALETTE=yes
@@ -4675,6 +4681,47 @@ echo "${ECHO_T}yes" >&6; }
 echo "${ECHO_T}no" >&6; }
          fi

+
+          enablestring=
+          { echo "$as_me:$LINENO: checking for --${enablestring:-enable}-extended_rtti" >&5
+echo $ECHO_N "checking for --${enablestring:-enable}-extended_rtti... $ECHO_C" >&6; }
+          no_cache=0
+          # Check whether --enable-extended_rtti was given.
+if test "${enable_extended_rtti+set}" = set; then
+  enableval=$enable_extended_rtti;
+                          if test "$enableval" = yes; then
+                            ac_cv_use_extended_rtti='wxUSE_EXTENDED_RTTI=yes'
+                          else
+                            ac_cv_use_extended_rtti='wxUSE_EXTENDED_RTTI=no'
+                          fi
+
+else
+
+                          LINE=`grep "wxUSE_EXTENDED_RTTI" ${wx_arg_cache_file}`
+                          if test "x$LINE" != x ; then
+                            eval "DEFAULT_$LINE"
+                          else
+                            no_cache=1
+                          fi
+
+                          ac_cv_use_extended_rtti='wxUSE_EXTENDED_RTTI='$DEFAULT_wxUSE_EXTENDED_RTTI
+
+fi
+
+
+          eval "$ac_cv_use_extended_rtti"
+          if test "$no_cache" != 1; then
+            echo $ac_cv_use_extended_rtti >> ${wx_arg_cache_file}.tmp
+          fi
+
+          if test "$wxUSE_EXTENDED_RTTI" = yes; then
+            { echo "$as_me:$LINENO: result: yes" >&5
+echo "${ECHO_T}yes" >&6; }
+          else
+            { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+          fi
+
 if test "$USE_OS2" = "1"; then
    DEFAULT_wxUSE_OMF=no

@@ -6698,6 +6745,47 @@ echo "${ECHO_T}no" >&6; }
          fi


+          enablestring=
+          { echo "$as_me:$LINENO: checking for --${enablestring:-enable}-utf8" >&5
+echo $ECHO_N "checking for --${enablestring:-enable}-utf8... $ECHO_C" >&6; }
+          no_cache=0
+          # Check whether --enable-utf8 was given.
+if test "${enable_utf8+set}" = set; then
+  enableval=$enable_utf8;
+                          if test "$enableval" = yes; then
+                            ac_cv_use_utf8='wxUSE_UNICODE_UTF8=yes'
+                          else
+                            ac_cv_use_utf8='wxUSE_UNICODE_UTF8=no'
+                          fi
+
+else
+
+                          LINE=`grep "wxUSE_UNICODE_UTF8" ${wx_arg_cache_file}`
+                          if test "x$LINE" != x ; then
+                            eval "DEFAULT_$LINE"
+                          else
+                            no_cache=1
+                          fi
+
+                          ac_cv_use_utf8='wxUSE_UNICODE_UTF8='$DEFAULT_wxUSE_UNICODE_UTF8
+
+fi
+
+
+          eval "$ac_cv_use_utf8"
+          if test "$no_cache" != 1; then
+            echo $ac_cv_use_utf8 >> ${wx_arg_cache_file}.tmp
+          fi
+
+          if test "$wxUSE_UNICODE_UTF8" = yes; then
+            { echo "$as_me:$LINENO: result: yes" >&5
+echo "${ECHO_T}yes" >&6; }
+          else
+            { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+          fi
+
+
          enablestring=
          { echo "$as_me:$LINENO: checking for --${enablestring:-enable}-snglinst" >&5
 echo $ECHO_N "checking for --${enablestring:-enable}-snglinst... $ECHO_C" >&6; }
@@ -22380,13 +22468,11 @@ _ACEOF
 cat confdefs.h >>conftest.$ac_ext
 cat >>conftest.$ac_ext <<_ACEOF
 /* end confdefs.h.  */
-#include <sys/types.h> /* for off_t */
 #include <stdio.h>
 int
 main ()
 {
-int (*fp) (FILE *, off_t, int) = fseeko;
-     return fseeko (stdin, 0, 0) && fp (stdin, 0, 0);
+return fseeko (stdin, 0, 0) && (fseeko) (stdin, 0, 0);
  ;
  return 0;
 }
@@ -22426,13 +22512,11 @@ cat confdefs.h >>conftest.$ac_ext
 cat >>conftest.$ac_ext <<_ACEOF
 /* end confdefs.h.  */
 #define _LARGEFILE_SOURCE 1
-#include <sys/types.h> /* for off_t */
 #include <stdio.h>
 int
 main ()
 {
-int (*fp) (FILE *, off_t, int) = fseeko;
-     return fseeko (stdin, 0, 0) && fp (stdin, 0, 0);
+return fseeko (stdin, 0, 0) && (fseeko) (stdin, 0, 0);
  ;
  return 0;
 }
@@ -39268,7 +39352,6 @@ echo $ECHO_N "checking how many arguments gethostbyname_r() takes... $ECHO_C" >&
 else


-################################################################

 ac_cv_func_which_gethostbyname_r=unknown

@@ -39498,7 +39581,6 @@ rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext

 fi

-################################################################


 fi
@@ -39647,18 +39729,141 @@ _ACEOF
 fi

      fi
-
-{ echo "$as_me:$LINENO: checking how many arguments getservbyname_r() takes" >&5
-echo $ECHO_N "checking how many arguments getservbyname_r() takes... $ECHO_C" >&6; }
+                              { echo "$as_me:$LINENO: checking for getservbyname_r" >&5
+echo $ECHO_N "checking for getservbyname_r... $ECHO_C" >&6; }
 if test "${ac_cv_func_which_getservbyname_r+set}" = set; then
  echo $ECHO_N "(cached) $ECHO_C" >&6
 else

-                ac_ext=cpp
-ac_cpp='$CXXCPP $CPPFLAGS'
-ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
-ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
-ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
+{ echo "$as_me:$LINENO: checking for getservbyname_r" >&5
+echo $ECHO_N "checking for getservbyname_r... $ECHO_C" >&6; }
+if test "${ac_cv_func_getservbyname_r+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+/* Define getservbyname_r to an innocuous variant, in case <limits.h> declares getservbyname_r.
+   For example, HP-UX 11i <limits.h> declares gettimeofday.  */
+#define getservbyname_r innocuous_getservbyname_r
+
+/* System header to define __stub macros and hopefully few prototypes,
+    which can conflict with char getservbyname_r (); below.
+    Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+    <limits.h> exists even on freestanding compilers.  */
+
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+
+#undef getservbyname_r
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char getservbyname_r ();
+/* The GNU C library defines this for functions which it implements
+    to always fail with ENOSYS.  Some functions are actually named
+    something starting with __ and the normal name is an alias.  */
+#if defined __stub_getservbyname_r || defined __stub___getservbyname_r
+choke me
+#endif
+
+int
+main ()
+{
+return getservbyname_r ();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } && {
+	 test -z "$ac_c_werror_flag" ||
+	 test ! -s conftest.err
+       } && test -s conftest$ac_exeext &&
+       $as_test_x conftest$ac_exeext; then
+  ac_cv_func_getservbyname_r=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_func_getservbyname_r=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+      conftest$ac_exeext conftest.$ac_ext
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_func_getservbyname_r" >&5
+echo "${ECHO_T}$ac_cv_func_getservbyname_r" >&6; }
+if test $ac_cv_func_getservbyname_r = yes; then
+
+        cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+#               include <netdb.h>
+
+int
+main ()
+{
+
+
+        char *name;
+        char *proto;
+        struct servent *se;
+        struct servent_data data;
+        (void) getservbyname_r(name, proto, se, &data);
+
+
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } && {
+	 test -z "$ac_c_werror_flag" ||
+	 test ! -s conftest.err
+       } && test -s conftest.$ac_objext; then
+  ac_cv_func_which_getservbyname_r=four
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5


  cat >conftest.$ac_ext <<_ACEOF
@@ -39667,7 +39872,9 @@ _ACEOF
 cat confdefs.h >>conftest.$ac_ext
 cat >>conftest.$ac_ext <<_ACEOF
 /* end confdefs.h.  */
+
 #   include <netdb.h>
+
 int
 main ()
 {
@@ -39697,7 +39904,7 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
  cat conftest.err >&5
  echo "$as_me:$LINENO: \$? = $ac_status" >&5
  (exit $ac_status); } && {
-	 test -z "$ac_cxx_werror_flag" ||
+	 test -z "$ac_c_werror_flag" ||
 	 test ! -s conftest.err
       } && test -s conftest.$ac_objext; then
  ac_cv_func_which_getservbyname_r=six
@@ -39712,7 +39919,9 @@ _ACEOF
 cat confdefs.h >>conftest.$ac_ext
 cat >>conftest.$ac_ext <<_ACEOF
 /* end confdefs.h.  */
+
 #   include <netdb.h>
+
 int
 main ()
 {
@@ -39742,7 +39951,7 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
  cat conftest.err >&5
  echo "$as_me:$LINENO: \$? = $ac_status" >&5
  (exit $ac_status); } && {
-	 test -z "$ac_cxx_werror_flag" ||
+	 test -z "$ac_c_werror_flag" ||
 	 test ! -s conftest.err
       } && test -s conftest.$ac_objext; then
  ac_cv_func_which_getservbyname_r=five
@@ -39750,51 +39959,13 @@ else
  echo "$as_me: failed program was:" >&5
 sed 's/^/| /' conftest.$ac_ext >&5

-
-                        cat >conftest.$ac_ext <<_ACEOF
-/* confdefs.h.  */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h.  */
-#include <netdb.h>
-int
-main ()
-{
-
-                                char *name;
-                                char *proto;
-                                struct servent *se;
-                                struct servent_data data;
-                                (void) getservbyname_r(name, proto, se, &data);
-
-  ;
-  return 0;
-}
-_ACEOF
-rm -f conftest.$ac_objext
-if { (ac_try="$ac_compile"
-case "(($ac_try" in
-  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
-  *) ac_try_echo=$ac_try;;
-esac
-eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
-  (eval "$ac_compile") 2>conftest.er1
-  ac_status=$?
-  grep -v '^ *+' conftest.er1 >conftest.err
-  rm -f conftest.er1
-  cat conftest.err >&5
-  echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_cxx_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest.$ac_objext; then
-  ac_cv_func_which_getservbyname_r=four
-else
-  echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
 	ac_cv_func_which_getservbyname_r=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+
+
+

 fi

@@ -39805,18 +39976,10 @@ fi

 rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext

-
+else
+  ac_cv_func_which_getservbyname_r=no
 fi

-rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
-        ac_ext=c
-ac_cpp='$CPP $CPPFLAGS'
-ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
-ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
-ac_compiler_gnu=$ac_cv_c_compiler_gnu
-
-
-
 fi
 { echo "$as_me:$LINENO: result: $ac_cv_func_which_getservbyname_r" >&5
 echo "${ECHO_T}$ac_cv_func_which_getservbyname_r" >&6; }
@@ -39836,6 +39999,7 @@ elif test $ac_cv_func_which_getservbyname_r = four; then
 #define HAVE_FUNC_GETSERVBYNAME_R_4 1
 _ACEOF

+
 fi


@@ -40715,6 +40879,13 @@ _ACEOF

 fi

+if test "$wxUSE_EXTENDED_RTTI" = "yes"; then
+  cat >>confdefs.h <<\_ACEOF
+#define wxUSE_EXTENDED_RTTI 1
+_ACEOF
+
+fi
+
 if test "$wxUSE_APPLE_IEEE" = "yes"; then
  cat >>confdefs.h <<\_ACEOF
 #define wxUSE_APPLE_IEEE 1
@@ -43476,6 +43647,13 @@ fi
    fi
 fi

+if test "$wxUSE_UNICODE" = "yes" -a "$wxUSE_UNICODE_UTF8" = "yes"; then
+    cat >>confdefs.h <<\_ACEOF
+#define wxUSE_UNICODE_UTF8 1
+_ACEOF
+
+fi
+
 if test "$wxUSE_wxUSE_EXPERIMENTAL_PRINTF" = "yes"; then
  cat >>confdefs.h <<\_ACEOF
 #define wxUSE_EXPERIMENTAL_PRINTF 1
@@ -46113,6 +46291,9 @@ echo "${ECHO_T}$bakefile_cv_prog_makeisgnu" >&6; }
                PLATFORM_BEOS=1
            ;;
            * )
+                { { echo "$as_me:$LINENO: error: Unknown platform: $BAKEFILE_FORCE_PLATFORM" >&5
+echo "$as_me: error: Unknown platform: $BAKEFILE_FORCE_PLATFORM" >&2;}
+   { (exit 1); exit 1; }; }
            ;;
        esac
    fi
@@ -48857,10 +49038,21 @@ echo "${ECHO_T}no" >&6; }

    cppunit_major_min=`echo $cppunit_version_min | \
           sed 's/\([0-9]*\).\([0-9]*\).\([0-9]*\)/\1/'`
+    if test "x${cppunit_major_min}" = "x" ; then
+       cppunit_major_min=0
+    fi
+
    cppunit_minor_min=`echo $cppunit_version_min | \
           sed 's/\([0-9]*\).\([0-9]*\).\([0-9]*\)/\2/'`
+    if test "x${cppunit_minor_min}" = "x" ; then
+       cppunit_minor_min=0
+    fi
+
    cppunit_micro_min=`echo $cppunit_version_min | \
           sed 's/\([0-9]*\).\([0-9]*\).\([0-9]*\)/\3/'`
+    if test "x${cppunit_micro_min}" = "x" ; then
+       cppunit_micro_min=0
+    fi

    cppunit_version_proper=`expr \
        $cppunit_major_version \> $cppunit_major_min \| \
--- a/configure.in
+++ b/configure.in
@@ -576,6 +576,7 @@ if test $DEBUG_CONFIGURE = 1; then

  DEFAULT_wxUSE_UNICODE=no
  DEFAULT_wxUSE_UNICODE_MSLU=no
+  DEFAULT_wxUSE_UNICODE_UTF8=no
  DEFAULT_wxUSE_WCSRTOMBS=no

  DEFAULT_wxUSE_PALETTE=no
@@ -801,6 +802,7 @@ else

  DEFAULT_wxUSE_UNICODE=no
  DEFAULT_wxUSE_UNICODE_MSLU=yes
+  DEFAULT_wxUSE_UNICODE_UTF8=no
  DEFAULT_wxUSE_WCSRTOMBS=no

  DEFAULT_wxUSE_PALETTE=yes
@@ -987,6 +989,8 @@ WX_ARG_ENABLE(log,           [  --enable-log            use logging system], wxU
 WX_ARG_ENABLE(longlong,      [  --enable-longlong       use wxLongLong class], wxUSE_LONGLONG)
 WX_ARG_ENABLE(mimetype,      [  --enable-mimetype       use wxMimeTypesManager], wxUSE_MIMETYPE)
 WX_ARG_ENABLE(mslu,          [  --enable-mslu           use MS Layer for Unicode on Windows 9x (Win32 only)], wxUSE_UNICODE_MSLU)
+dnl FIXME-UTF8: make UTF8 automatic
+WX_ARG_ENABLE(utf8,          [  --enable-utf8           use UTF-8 representation for strings (Unix only)], wxUSE_UNICODE_UTF8)
 WX_ARG_ENABLE(snglinst,      [  --enable-snglinst       use wxSingleInstanceChecker class], wxUSE_SNGLINST_CHECKER)
 WX_ARG_ENABLE(std_iostreams, [  --enable-std_iostreams  use standard C++ stream classes], wxUSE_STD_IOSTREAM)
 WX_ARG_ENABLE(std_string,    [  --enable-std_string     use standard C++ string classes], wxUSE_STD_STRING)
@@ -6483,6 +6487,10 @@ if test "$wxUSE_UNICODE" = "yes" ; then
    fi
 fi

+if test "$wxUSE_UNICODE" = "yes" -a "$wxUSE_UNICODE_UTF8" = "yes"; then
+    AC_DEFINE(wxUSE_UNICODE_UTF8)
+fi
+
 if test "$wxUSE_wxUSE_EXPERIMENTAL_PRINTF" = "yes"; then
  AC_DEFINE(wxUSE_EXPERIMENTAL_PRINTF)
 fi
--- a/include/wx/buffer.h
+++ b/include/wx/buffer.h
@@ -168,8 +168,13 @@ typedef wxWritableCharTypeBuffer<wchar_t> wxWritableWCharBuffer;

    #define wxMB2WXbuf wxWCharBuffer
    #define wxWX2MBbuf wxCharBuffer
+    #if wxUSE_UNICODE_WCHAR
        #define wxWC2WXbuf wxChar*
        #define wxWX2WCbuf wxChar*
+    #elif wxUSE_UNICODE_UTF8
+        #define wxWC2WXbuf wxWCharBuffer
+        #define wxWX2WCbuf wxWCharBuffer
+    #endif
 #else // ANSI
    #define wxWxCharBuffer wxCharBuffer

--- a/include/wx/chartype.h
+++ b/include/wx/chartype.h
@@ -190,9 +190,15 @@
 /* depending on the platform, Unicode build can either store wxStrings as
   wchar_t* or UTF-8 encoded char*: */
 #if wxUSE_UNICODE
-    /* for now, all Unicode builds are wchar_t* based: */
-    #define wxUSE_UNICODE_WCHAR 1
+    #ifndef wxUSE_UNICODE_UTF8
        #define wxUSE_UNICODE_UTF8 0
+    #endif
+
+    #if wxUSE_UNICODE_UTF8
+        #define wxUSE_UNICODE_WCHAR 0
+    #else
+        #define wxUSE_UNICODE_WCHAR 1
+    #endif
 #else
    #define wxUSE_UNICODE_WCHAR 0
    #define wxUSE_UNICODE_UTF8  0
--- a/include/wx/list.h
+++ b/include/wx/list.h
@@ -380,7 +380,7 @@ private:
 union wxListKeyValue
 {
    long integer;
-    wxChar *string;
+    wxString *string;
 };

 // a struct which may contain both types of keys
@@ -397,15 +397,13 @@ public:
        { }
    wxListKey(long i) : m_keyType(wxKEY_INTEGER)
        { m_key.integer = i; }
-    wxListKey(const wxChar *s) : m_keyType(wxKEY_STRING)
-        { m_key.string = wxStrdup(s); }
    wxListKey(const wxString& s) : m_keyType(wxKEY_STRING)
-        { m_key.string = wxStrdup(s.c_str()); }
+        { m_key.string = new wxString(s); }

    // accessors
    wxKeyType GetKeyType() const { return m_keyType; }
-    const wxChar *GetString() const
-        { wxASSERT( m_keyType == wxKEY_STRING ); return m_key.string; }
+    const wxString GetString() const
+        { wxASSERT( m_keyType == wxKEY_STRING ); return *m_key.string; }
    long GetNumber() const
        { wxASSERT( m_keyType == wxKEY_INTEGER ); return m_key.integer; }

@@ -418,7 +416,7 @@ public:
    ~wxListKey()
    {
        if ( m_keyType == wxKEY_STRING )
-            free(m_key.string);
+            delete m_key.string;
    }

 private:
@@ -448,11 +446,11 @@ public:
    virtual ~wxNodeBase();

    // FIXME no check is done that the list is really keyed on strings
-    const wxChar *GetKeyString() const { return m_key.string; }
+    wxString GetKeyString() const { return *m_key.string; }
    long GetKeyInteger() const { return m_key.integer; }

    // Necessary for some existing code
-    void SetKeyString(wxChar* s) { m_key.string = s; }
+    void SetKeyString(const wxString& s) { m_key.string = new wxString(s); }
    void SetKeyInteger(long i) { m_key.integer = i; }

 #ifdef wxLIST_COMPATIBILITY
@@ -602,7 +600,7 @@ protected:

        // keyed append
    wxNodeBase *Append(long key, void *object);
-    wxNodeBase *Append(const wxChar *key, void *object);
+    wxNodeBase *Append(const wxString& key, void *object);

        // removes node from the list but doesn't delete it (returns pointer
        // to the node or NULL if it wasn't found in the list)
--- a/include/wx/log.h
+++ b/include/wx/log.h
@@ -476,14 +476,14 @@ WXDLLIMPEXP_BASE const wxChar* wxSysErrorMsg(unsigned long nErrCode = 0);
    WX_DEFINE_VARARG_FUNC_VOID(wxLog##level, wxDoLog##level)

 #define DECLARE_LOG_FUNCTION_IMPL(level)                                    \
-    extern void WXDLLIMPEXP_BASE wxVLog##level(const wxChar *szFormat,      \
+    extern void WXDLLIMPEXP_BASE wxVLog##level(const wxString& format,      \
                                               va_list argptr);             \
-    extern void WXDLLIMPEXP_BASE wxDoLog##level(const wxChar *szFormat,     \
-                                                ...) ATTRIBUTE_PRINTF_1
+    extern void WXDLLIMPEXP_BASE                                            \
+    wxDoLog##level(const wxChar *szFormat, ...) ATTRIBUTE_PRINTF_1

 #define DECLARE_LOG_FUNCTION2_EXP_IMPL(level, argclass, arg, expdecl)       \
    extern void expdecl wxVLog##level(argclass arg,                         \
-                                      const wxChar *szFormat,               \
+                                      const wxString& format,               \
                                      va_list argptr);                      \
    extern void expdecl wxDoLog##level(argclass arg,                        \
                                       const wxChar *szFormat,              \
@@ -497,12 +497,12 @@ WXDLLIMPEXP_BASE const wxChar* wxSysErrorMsg(unsigned long nErrCode = 0);
    WX_DEFINE_VARARG_FUNC_NOP(wxLog##level)

 #define DECLARE_LOG_FUNCTION_IMPL(level)                                    \
-    inline void wxVLog##level(const wxChar *WXUNUSED(szFormat),             \
+    inline void wxVLog##level(const wxString& WXUNUSED(format),             \
                              va_list WXUNUSED(argptr)) { }                 \

 #define DECLARE_LOG_FUNCTION2_EXP_IMPL(level, argclass, arg, expdecl)       \
    inline void wxVLog##level(argclass WXUNUSED(arg),                       \
-                              const wxChar *WXUNUSED(szFormat),             \
+                              const wxString& WXUNUSED(format),             \
                              va_list WXUNUSED(argptr)) {}

 // Empty Class to fake wxLogNull
@@ -590,7 +590,7 @@ DECLARE_LOG_FUNCTION_PUBLIC(SysError)

    // this version only logs the message if the mask had been added to the
    // list of masks with AddTraceMask()
-    DECLARE_LOG_FUNCTION2_IMPL(Trace, const wxChar*, mask);
+    DECLARE_LOG_FUNCTION2_IMPL(Trace, const wxString&, mask);
    // and this one does nothing if all of level bits are not set in
    // wxLog::GetActive()->GetTraceMask() -- it's deprecated in favour of
    // string identifiers
--- a/include/wx/string.h
+++ b/include/wx/string.h
@@ -371,13 +371,14 @@ private:
      T data;
      size_t len;

-      SubstrBufFromType() {}
      SubstrBufFromType(const T& data_, size_t len_)
          : data(data_), len(len_) {}
  };

 #if wxUSE_UNICODE_UTF8
-  // FIXME-UTF8: this will have to use slightly different type
+  // even char* -> char* needs conversion, from locale charset to UTF-8
+  typedef SubstrBufFromType<wxCharBuffer>    SubstrBufFromWC;
+  typedef SubstrBufFromType<wxCharBuffer>    SubstrBufFromMB;
 #elif wxUSE_UNICODE_WCHAR
  typedef SubstrBufFromType<const wchar_t*>  SubstrBufFromWC;
  typedef SubstrBufFromType<wxWCharBuffer>   SubstrBufFromMB;
@@ -392,8 +393,12 @@ private:
  // between UTF-8 and wchar_t* representations of the string are mostly
  // contained here.

-#if wxUSE_UNICODE
-  // FIXME-UTF8: This will need changes when UTF8 build is introduced
+#if wxUSE_UNICODE_UTF8
+  static SubstrBufFromMB ConvertStr(const char *psz, size_t nLength,
+                                    const wxMBConv& conv);
+  static SubstrBufFromWC ConvertStr(const wchar_t *pwz, size_t nLength,
+                                    const wxMBConv& conv);
+#elif wxUSE_UNICODE_WCHAR
  static SubstrBufFromMB ConvertStr(const char *psz, size_t nLength,
                                    const wxMBConv& conv);
 #else
@@ -447,6 +452,7 @@ private:
  // encodes the character to a form used to represent it in internal
  // representation (returns a string in UTF8 version)
  static wxChar EncodeChar(wxUniChar ch) { return (wxChar)ch; }
+  static wxUniChar DecodeChar(wxStringImpl::const_iterator i) { return *i; }

  // translates position index in wxString to/from index in underlying
  // wxStringImpl:
@@ -459,11 +465,56 @@ private:

 #else // wxUSE_UNICODE_UTF8

-  typedef char Utf8CharBuffer[5];
+  // checks correctness of UTF-8 sequence
+  static bool IsValidUtf8String(const char *c);
+#ifdef __WXDEBUG__
+  static bool IsValidUtf8LeadByte(unsigned char c);
+#endif
+
+  // table of offsets to skip forward when iterating
+  static unsigned char ms_utf8IterTable[256];
+
+  static void IncIter(wxStringImpl::iterator& i)
+  {
+      wxASSERT( IsValidUtf8LeadByte(*i) );
+      i += ms_utf8IterTable[(unsigned char)*i];
+  }
+  static void IncIter(wxStringImpl::const_iterator& i)
+  {
+      wxASSERT( IsValidUtf8LeadByte(*i) );
+      i += ms_utf8IterTable[(unsigned char)*i];
+  }
+
+  static void DecIter(wxStringImpl::iterator& i);
+  static void DecIter(wxStringImpl::const_iterator& i);
+  static wxStringImpl::iterator AddToIter(wxStringImpl::iterator i, int n);
+  static wxStringImpl::const_iterator AddToIter(wxStringImpl::const_iterator i, int n);
+  static int DiffIters(wxStringImpl::const_iterator i1, wxStringImpl::const_iterator i2);
+  static int DiffIters(wxStringImpl::iterator i1, wxStringImpl::iterator i2);
+
+  struct Utf8CharBuffer
+  {
+      char data[5];
+      operator const char*() const { return data; }
+  };
  static Utf8CharBuffer EncodeChar(wxUniChar ch);
  // returns n copies of ch encoded in UTF-8 string
  static wxCharBuffer EncodeNChars(size_t n, wxUniChar ch);

+  // returns the length of UTF-8 encoding of the character with lead byte 'c'
+  static size_t GetUtf8CharLength(char c)
+  {
+      wxASSERT( IsValidUtf8LeadByte(c) );
+      return ms_utf8IterTable[(unsigned char)c];
+  }
+
+  // decodes single UTF-8 character from UTF-8 string
+  // FIXME-UTF8: move EncodeChar/DecodeChar and other operations to separate
+  //             class
+  static wxUniChar DecodeChar(wxStringImpl::const_iterator i)
+    { return wxUniCharRef::DecodeChar(i); }
+  friend class WXDLLIMPEXP_BASE wxUniCharRef;
+
  size_t PosToImpl(size_t pos) const
  {
      if ( pos == 0 || pos == npos )
@@ -472,6 +523,15 @@ private:
          return wxStringImpl::const_iterator(begin() + pos) - m_impl.begin();
  }

+  void PosLenToImpl(size_t pos, size_t len, size_t *implPos, size_t *implLen) const;
+
+  size_t LenToImpl(size_t len) const
+  {
+      size_t pos, len2;
+      PosLenToImpl(0, len, &pos, &len2);
+      return len2;
+  }
+
  size_t PosFromImpl(size_t pos) const
  {
      if ( pos == 0 || pos == npos )
@@ -480,13 +540,23 @@ private:
          return const_iterator(m_impl.begin() + pos) - begin();
  }

-  // FIXME: return as-is without copying under UTF8 locale, return
+  size_t IterToImplPos(wxStringImpl::iterator i) const
+    { return wxStringImpl::const_iterator(i) - m_impl.begin(); }
+
+  // FIXME-UTF8: return as-is without copying under UTF8 locale, return
  //             converted string under other locales - needs wxCharBuffer
  //             changes
-  static wxCharBuffer ImplStr(const char* str);
+  static wxCharBuffer ImplStr(const char* str,
+                              const wxMBConv& conv = wxConvLibc)
+    { return ConvertStr(str, npos, conv).data; }
+  static SubstrBufFromMB ImplStr(const char* str, size_t n,
+                                 const wxMBConv& conv = wxConvLibc)
+    { return ConvertStr(str, n, conv); }

  static wxCharBuffer ImplStr(const wchar_t* str)
-      { return wxConvUTF8.cWC2MB(str); }
+    { return ConvertStr(str, npos, wxConvUTF8).data; }
+  static SubstrBufFromWC ImplStr(const wchar_t* str, size_t n)
+    { return ConvertStr(str, n, wxConvUTF8); }
 #endif // !wxUSE_UNICODE_UTF8/wxUSE_UNICODE_UTF8


@@ -496,7 +566,9 @@ public:
  wxString() {}

    // copy ctor
+  // FIXME-UTF8: this one needs to do UTF-8 conversion in UTF-8 build!
  wxString(const wxStringImpl& stringSrc) : m_impl(stringSrc) { }
+
  wxString(const wxString& stringSrc) : m_impl(stringSrc.m_impl) { }

    // string containing nRepeat copies of ch
@@ -571,6 +643,18 @@ public:
  wxString(const wxString& str, size_t nLength)
      : m_impl(str.Mid(0, nLength).m_impl) {}

+  // even if we're not built with wxUSE_STL == 1 it is very convenient to allow
+  // implicit conversions from std::string to wxString as this allows to use
+  // the same strings in non-GUI and GUI code, however we don't want to
+  // unconditionally add this ctor as it would make wx lib dependent on
+  // libstdc++ on some Linux versions which is bad, so instead we ask the
+  // client code to define this wxUSE_STD_STRING symbol if they need it
+#if wxUSE_STD_STRING && !wxUSE_STL_BASED_WXSTRING
+  wxString(const wxStdString& s)
+      // FIXME-UTF8: this one needs to do UTF-8 conversion in UTF-8 build!
+      : m_impl(s.c_str()) { } // FIXME-UTF8: this is broken for embedded 0s
+#endif // wxUSE_STD_STRING && !wxUSE_STL_BASED_WXSTRING
+
 public:
  // standard types
  typedef wxUniChar value_type;
@@ -583,7 +667,12 @@ public:
  typedef wxUniChar const_reference;

 #if wxUSE_STL
+  #if wxUSE_UNICODE_UTF8
+    // random access is not O(1), as required by Random Access Iterator
+    #define WX_STR_ITERATOR_TAG std::bidirectional_iterator_tag
+  #else
    #define WX_STR_ITERATOR_TAG std::random_access_iterator_tag
+  #endif
 #else
  #define WX_STR_ITERATOR_TAG void /* dummy type */
 #endif
@@ -599,8 +688,6 @@ public:
          typedef reference_type reference;                                 \
          typedef pointer_type pointer;                                     \
                                                                            \
-          iterator_name(const iterator_name& i) : m_cur(i.m_cur) {}         \
-                                                                            \
          reference operator*() const { return reference_ctor; }            \
          reference operator[](size_t n) const { return *(*this + n); }     \
                                                                            \
@@ -621,14 +708,6 @@ public:
              return tmp;                                                   \
          }                                                                 \
                                                                            \
-          iterator_name operator+(int n) const                              \
-            { return iterator_name(wxString::AddToIter(m_cur, n)); }        \
-          iterator_name operator+(size_t n) const                           \
-            { return iterator_name(wxString::AddToIter(m_cur, (int)n)); }   \
-          iterator_name operator-(int n) const                              \
-            { return iterator_name(wxString::AddToIter(m_cur, -n)); }       \
-          iterator_name operator-(size_t n) const                           \
-            { return iterator_name(wxString::AddToIter(m_cur, -(int)n)); }  \
          iterator_name& operator+=(int n)                                  \
            { m_cur = wxString::AddToIter(m_cur, n); return *this; }        \
          iterator_name& operator+=(size_t n)                               \
@@ -657,7 +736,6 @@ public:
                                                                            \
      private:                                                              \
          /* for internal wxString use only: */                             \
-          iterator_name(underlying_iterator ptr) : m_cur(ptr) {}            \
          operator underlying_iterator() const { return m_cur; }            \
                                                                            \
          friend class WXDLLIMPEXP_BASE wxString;                           \
@@ -668,23 +746,90 @@ public:

  class const_iterator;

+#if wxUSE_UNICODE_UTF8
+  class iterator
+  {
+      // NB: In UTF-8 build, (non-const) iterator needs to keep reference
+      //     to the underlying wxStringImpl, because UTF-8 is variable-length
+      //     encoding and changing the value pointer to by an iterator using
+      //     its operator* requires calling wxStringImpl::replace() if the old
+      //     and new values differ in their encoding's length.
+
+      WX_STR_ITERATOR_IMPL(iterator, wxChar*, wxUniCharRef,
+                           wxUniCharRef::CreateForString(m_str, m_cur));
+
+  public:
+      iterator(const iterator& i) : m_cur(i.m_cur), m_str(i.m_str) {}
+
+      iterator operator+(int n) const
+        { return iterator(m_str, wxString::AddToIter(m_cur, n)); }
+      iterator operator+(size_t n) const
+        { return iterator(m_str, wxString::AddToIter(m_cur, (int)n)); }
+      iterator operator-(int n) const
+        { return iterator(m_str, wxString::AddToIter(m_cur, -n)); }
+      iterator operator-(size_t n) const
+        { return iterator(m_str, wxString::AddToIter(m_cur, -(int)n)); }
+
+  private:
+      iterator(wxString *str, underlying_iterator ptr)
+          : m_cur(ptr), m_str(str->m_impl) {}
+      iterator(wxStringImpl& str, underlying_iterator ptr)
+          : m_cur(ptr), m_str(str) {}
+
+      wxStringImpl& m_str;
+
+      friend class const_iterator;
+  };
+#else // !wxUSE_UNICODE_UTF8
  class iterator
  {
      WX_STR_ITERATOR_IMPL(iterator, wxChar*, wxUniCharRef,
                           wxUniCharRef::CreateForString(m_cur));

+  public:
+      iterator(const iterator& i) : m_cur(i.m_cur) {}
+
+      iterator operator+(int n) const
+        { return iterator(wxString::AddToIter(m_cur, n)); }
+      iterator operator+(size_t n) const
+        { return iterator(wxString::AddToIter(m_cur, (int)n)); }
+      iterator operator-(int n) const
+        { return iterator(wxString::AddToIter(m_cur, -n)); }
+      iterator operator-(size_t n) const
+        { return iterator(wxString::AddToIter(m_cur, -(int)n)); }
+
+  private:
+      // for internal wxString use only:
+      iterator(underlying_iterator ptr) : m_cur(ptr) {}
+      iterator(wxString *WXUNUSED(str), underlying_iterator ptr) : m_cur(ptr) {}
+
      friend class const_iterator;
  };
+#endif // wxUSE_UNICODE_UTF8/!wxUSE_UNICODE_UTF8

  class const_iterator
  {
      // NB: reference_type is intentionally value, not reference, the character
      //     may be encoded differently in wxString data:
      WX_STR_ITERATOR_IMPL(const_iterator, const wxChar*, wxUniChar,
-                           wxUniChar(*m_cur));
+                           wxString::DecodeChar(m_cur));

  public:
+      const_iterator(const const_iterator& i) : m_cur(i.m_cur) {}
      const_iterator(const iterator& i) : m_cur(i.m_cur) {}
+
+      const_iterator operator+(int n) const
+        { return const_iterator(wxString::AddToIter(m_cur, n)); }
+      const_iterator operator+(size_t n) const
+        { return const_iterator(wxString::AddToIter(m_cur, (int)n)); }
+      const_iterator operator-(int n) const
+        { return const_iterator(wxString::AddToIter(m_cur, -n)); }
+      const_iterator operator-(size_t n) const
+        { return const_iterator(wxString::AddToIter(m_cur, -(int)n)); }
+
+  private:
+      // for internal wxString use only:
+      const_iterator(underlying_iterator ptr) : m_cur(ptr) {}
  };

  #undef WX_STR_ITERATOR_TAG
@@ -767,10 +912,10 @@ public:

  // first valid index position
  const_iterator begin() const { return const_iterator(m_impl.begin()); }
-  iterator begin() { return iterator(m_impl.begin()); }
+  iterator begin() { return iterator(this, m_impl.begin()); }
  // position one after the last valid one
  const_iterator end() const { return const_iterator(m_impl.end()); }
-  iterator end() { return iterator(m_impl.end()); }
+  iterator end() { return iterator(this, m_impl.end()); }

  // first element of the reversed string
  const_reverse_iterator rbegin() const
@@ -925,7 +1070,7 @@ public:

    // explicit conversion to C string in internal representation (char*,
    // wchar_t*, UTF-8-encoded char*, depending on the build):
-    const_pointer wx_str() const { return m_impl.c_str(); }
+    const wxStringCharType *wx_str() const { return m_impl.c_str(); }

    // conversion to *non-const* multibyte or widestring buffer; modifying
    // returned buffer won't affect the string, these methods are only useful
@@ -963,21 +1108,26 @@ public:

    const wxWX2MBbuf mbc_str() const { return mb_str(*wxConvCurrent); }

-    const wxChar* wc_str() const { return c_str(); }
-
+#if wxUSE_UNICODE_WCHAR
+    const wxChar* wc_str() const { return wx_str(); }
+#elif wxUSE_UNICODE_UTF8
+    const wxWCharBuffer wc_str() const;
+#endif
    // for compatibility with !wxUSE_UNICODE version
-    const wxChar* wc_str(const wxMBConv& WXUNUSED(conv)) const { return c_str(); }
+    const wxWX2WCbuf wc_str(const wxMBConv& WXUNUSED(conv)) const
+      { return wc_str(); }

 #if wxMBFILES
    const wxCharBuffer fn_str() const { return mb_str(wxConvFile); }
 #else // !wxMBFILES
-    const wxChar* fn_str() const { return c_str(); }
+    const wxWX2WCbuf fn_str() const { return wc_str(); }
 #endif // wxMBFILES/!wxMBFILES
+
 #else // ANSI
-    const wxChar* mb_str() const { return c_str(); }
+    const wxChar* mb_str() const { return wx_str(); }

    // for compatibility with wxUSE_UNICODE version
-    const wxChar* mb_str(const wxMBConv& WXUNUSED(conv)) const { return c_str(); }
+    const wxChar* mb_str(const wxMBConv& WXUNUSED(conv)) const { return wx_str(); }

    const wxWX2MBbuf mbc_str() const { return mb_str(); }

@@ -1588,14 +1738,25 @@ public:
  {
 #if wxUSE_UNICODE_UTF8
    if ( !ch.IsAscii() )
-        m_impl.insert(begin() + nPos, EncodeNChars(n, ch));
+        m_impl.insert(PosToImpl(nPos), EncodeNChars(n, ch));
    else
 #endif
-        m_impl.insert(begin() + nPos, n, (wxStringCharType)ch);
+        m_impl.insert(PosToImpl(nPos), n, (wxStringCharType)ch);
    return *this;
  }
  iterator insert(iterator it, wxUniChar ch)
-    { return iterator(m_impl.insert(it, EncodeChar(ch))); }
+  {
+#if wxUSE_UNICODE_UTF8
+    if ( !ch.IsAscii() )
+    {
+        size_t pos = IterToImplPos(it);
+        m_impl.insert(pos, EncodeChar(ch));
+        return iterator(this, m_impl.begin() + pos);
+    }
+    else
+#endif
+        return iterator(this, m_impl.insert(it, (wxStringCharType)ch));
+  }
  void insert(iterator it, const_iterator first, const_iterator last)
    { m_impl.insert(it, first, last); }
  void insert(iterator it, const char *first, const char *last)
@@ -1606,7 +1767,7 @@ public:
  {
 #if wxUSE_UNICODE_UTF8
    if ( !ch.IsAscii() )
-        m_impl.insert(it, EncodeNChars(n, ch));
+        m_impl.insert(IterToImplPos(it), EncodeNChars(n, ch));
    else
 #endif
        m_impl.insert(it, n, (wxStringCharType)ch);
@@ -1622,9 +1783,9 @@ public:
  }
    // delete characters from first up to last
  iterator erase(iterator first, iterator last)
-    { return iterator(m_impl.erase(first, last)); }
+    { return iterator(this, m_impl.erase(first, last)); }
  iterator erase(iterator first)
-    { return iterator(m_impl.erase(first)); }
+    { return iterator(this, m_impl.erase(first)); }

 #ifdef wxSTRING_BASE_HASNT_CLEAR
  void clear() { erase(); }
@@ -1874,7 +2035,11 @@ public:

    // as strpbrk() but starts at nStart, returns npos if not found
  size_t find_first_of(const wxString& str, size_t nStart = 0) const
+#if wxUSE_UNICODE // FIXME-UTF8: temporary
+    { return find_first_of(str.mb_str().data(), nStart); }
+#else
    { return find_first_of((const wxChar*)str.c_str(), nStart); }
+#endif
    // same as above
  size_t find_first_of(const char* sz, size_t nStart = 0) const;
  size_t find_first_of(const wchar_t* sz, size_t nStart = 0) const;
@@ -1885,7 +2050,11 @@ public:
    { return find(c, nStart); }
    // find the last (starting from nStart) char from str in this string
  size_t find_last_of (const wxString& str, size_t nStart = npos) const
+#if wxUSE_UNICODE // FIXME-UTF8: temporary
+    { return find_last_of(str.mb_str().data(), nStart); }
+#else
    { return find_last_of((const wxChar*)str.c_str(), nStart); }
+#endif
    // same as above
  size_t find_last_of (const char* sz, size_t nStart = npos) const;
  size_t find_last_of (const wchar_t* sz, size_t nStart = npos) const;
@@ -1899,7 +2068,11 @@ public:

    // as strspn() (starting from nStart), returns npos on failure
  size_t find_first_not_of(const wxString& str, size_t nStart = 0) const
+#if wxUSE_UNICODE // FIXME-UTF8: temporary
+    { return find_first_not_of(str.mb_str().data(), nStart); }
+#else
    { return find_first_not_of((const wxChar*)str.c_str(), nStart); }
+#endif
    // same as above
  size_t find_first_not_of(const char* sz, size_t nStart = 0) const;
  size_t find_first_not_of(const wchar_t* sz, size_t nStart = 0) const;
@@ -1909,7 +2082,11 @@ public:
  size_t find_first_not_of(wxUniChar ch, size_t nStart = 0) const;
    //  as strcspn()
  size_t find_last_not_of(const wxString& str, size_t nStart = npos) const
+#if wxUSE_UNICODE // FIXME-UTF8: temporary
+    { return find_last_not_of(str.mb_str().data(), nStart); }
+#else
    { return find_last_not_of((const wxChar*)str.c_str(), nStart); }
+#endif
    // same as above
  size_t find_last_not_of(const char* sz, size_t nStart = npos) const;
  size_t find_last_not_of(const wchar_t* sz, size_t nStart = npos) const;
--- a/include/wx/stringimpl.h
+++ b/include/wx/stringimpl.h
@@ -42,6 +42,10 @@

 // global pointer to empty string
 extern WXDLLIMPEXP_DATA_BASE(const wxChar*) wxEmptyString;
+#if wxUSE_UNICODE_UTF8
+// FIXME-UTF8: we should have only one wxEmptyString
+extern WXDLLIMPEXP_DATA_BASE(const wxStringCharType*) wxEmptyStringImpl;
+#endif


 // ----------------------------------------------------------------------------
@@ -61,7 +65,7 @@ extern WXDLLIMPEXP_DATA_BASE(const wxChar*) wxEmptyString;
    #ifdef HAVE_STD_WSTRING
        typedef std::wstring wxStdString;
    #else
-        typedef std::basic_string<wxChar> wxStdString;
+        typedef std::basic_string<wxStringCharType> wxStdString;
    #endif
 #else
    typedef std::string wxStdString;
@@ -97,8 +101,8 @@ struct WXDLLIMPEXP_BASE wxStringData
  size_t  nDataLength,  // actual string length
          nAllocLength; // allocated memory size

-  // mimics declaration 'wxChar data[nAllocLength]'
-  wxChar* data() const { return (wxChar*)(this + 1); }
+  // mimics declaration 'wxStringCharType data[nAllocLength]'
+  wxStringCharType* data() const { return (wxStringCharType*)(this + 1); }

  // empty string has a special ref count so it's never deleted
  bool  IsEmpty()   const { return (nRefs == -1); }
@@ -143,7 +147,11 @@ protected:
  // string (re)initialization functions
    // initializes the string to the empty value (must be called only from
    // ctors, use Reinit() otherwise)
+#if wxUSE_UNICODE_UTF8
+  void Init() { m_pchData = (wxStringCharType *)wxEmptyStringImpl; } // FIXME-UTF8
+#else
  void Init() { m_pchData = (wxStringCharType *)wxEmptyString; }
+#endif
    // initializes the string with (a part of) C-string
  void InitWith(const wxStringCharType *psz, size_t nPos = 0, size_t nLen = npos);
    // as Init, but also frees old data
@@ -378,7 +386,7 @@ public:
    { ConcatSelf(str.length(), str.c_str()); return *this; }
    // append first n (or all if n == npos) characters of sz
  wxStringImpl& append(const wxStringCharType *sz)
-    { ConcatSelf(wxStrlen(sz), sz); return *this; }
+    { ConcatSelf(Strsize(sz), sz); return *this; }
  wxStringImpl& append(const wxStringCharType *sz, size_t n)
    { ConcatSelf(n, sz); return *this; }
    // append n copies of ch
@@ -395,7 +403,7 @@ public:
    { clear(); return append(str, pos, n); }
    // same as `= first n (or all if n == npos) characters of sz'
  wxStringImpl& assign(const wxStringCharType *sz)
-    { clear(); return append(sz, wxStrlen(sz)); }
+    { clear(); return append(sz, Strsize(sz)); }
  wxStringImpl& assign(const wxStringCharType *sz, size_t n)
    { clear(); return append(sz, n); }
    // same as `= n copies of ch'
@@ -430,9 +438,9 @@ public:
    // insert first n (or all if n == npos) characters of sz
  wxStringImpl& insert(size_t nPos, const wxStringCharType *sz, size_t n = npos);
    // insert n copies of ch
-  wxStringImpl& insert(size_t nPos, size_t n, wxStringCharType ch)// FIXME-UTF8: tricky
+  wxStringImpl& insert(size_t nPos, size_t n, wxStringCharType ch)
    { return insert(nPos, wxStringImpl(n, ch)); }
-  iterator insert(iterator it, wxStringCharType ch) // FIXME-UTF8: tricky
+  iterator insert(iterator it, wxStringCharType ch)
    { size_t idx = it - begin(); insert(idx, 1, ch); return begin() + idx; }
  void insert(iterator it, const_iterator first, const_iterator last)
    { insert(it - begin(), first, last - first); }
@@ -525,6 +533,13 @@ public:
  void DoUngetWriteBuf(size_t nLen);
 #endif

+private:
+#if wxUSE_UNICODE_UTF8
+  static size_t Strsize(const wxStringCharType *s) { return strlen(s); }
+#else
+  static size_t Strsize(const wxStringCharType *s) { return wxStrlen(s); }
+#endif
+
  friend class WXDLLIMPEXP_BASE wxString;
 };

--- a/include/wx/strvararg.h
+++ b/include/wx/strvararg.h
@@ -93,7 +93,7 @@ template<>
 struct WXDLLIMPEXP_BASE wxArgNormalizer<const wxCStrData&>
 {
    wxArgNormalizer(const wxCStrData& value) : m_value(value) {}
-    const wxStringCharType *get() const;
+    const wxChar *get() const; // FIXME-UTF8: should be wxStringCharType

    const wxCStrData& m_value;
 };
@@ -109,7 +109,7 @@ template<>
 struct WXDLLIMPEXP_BASE wxArgNormalizer<const wxString&>
 {
    wxArgNormalizer(const wxString& value) : m_value(value) {}
-    const wxStringCharType *get() const;
+    const wxChar *get() const; // FIXME-UTF8: should be wxStringCharType

    const wxString& m_value;
 };
@@ -121,8 +121,7 @@ struct wxArgNormalizer<wxString> : public wxArgNormalizer<const wxString&>
        : wxArgNormalizer<const wxString&>(value) {}
 };

-#if wxUSE_UNICODE_WCHAR
-
+#if wxUSE_UNICODE // FIXME-UTF8: should be wxUSE_UNICODE_WCHAR
 template<>
 struct WXDLLIMPEXP_BASE wxArgNormalizer<const char*>
 {
@@ -139,9 +138,30 @@ struct wxArgNormalizer<char*> : public wxArgNormalizer<const char*>
    wxArgNormalizer(char *value)
        : wxArgNormalizer<const char*>(value) {}
 };
+#endif // wxUSE_UNICODE_WCHAR

-#elif wxUSE_WCHAR_T // !wxUSE_UNICODE_WCHAR && wxUSE_WCHAR_T
+// FIXME-UTF8
+#if 0 // wxUSE_UNICODE_UTF8
+// for conversion from local charset to UTF-8
+template<>
+struct WXDLLIMPEXP_BASE wxArgNormalizer<const char*>
+{
+    wxArgNormalizer(const char *value);
+    ~wxArgNormalizer();
+    const char *get() const;

+    wxCharBuffer *m_value;
+};
+
+template<>
+struct wxArgNormalizer<char*> : public wxArgNormalizer<const char*>
+{
+    wxArgNormalizer(char *value)
+        : wxArgNormalizer<const char*>(value) {}
+};
+#endif // wxUSE_UNICODE_UTF8
+
+#if /*wxUSE_UNICODE_UTF8 || */ !wxUSE_UNICODE // FIXME-UTF8
 template<>
 struct WXDLLIMPEXP_BASE wxArgNormalizer<const wchar_t*>
 {
@@ -158,8 +178,7 @@ struct wxArgNormalizer<wchar_t*> : public wxArgNormalizer<const wchar_t*>
    wxArgNormalizer(wchar_t *value)
        : wxArgNormalizer<const wchar_t*>(value) {}
 };
-
-#endif // wxUSE_UNICODE_WCHAR / !wxUSE_UNICODE_WCHAR && wxUSE_WCHAR_T
+#endif // wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE

 // versions for passing wx[W]CharBuffer:
 template<>
--- a/include/wx/unichar.h
+++ b/include/wx/unichar.h
@@ -69,6 +69,9 @@ public:
    // Returns Unicode code point value of the character
    value_type GetValue() const { return m_value; }

+    // Returns true if the character is an ASCII character:
+    bool IsAscii() const { return m_value < 0x80; }
+
    // Conversions to char and wchar_t types: all of those are needed to be
    // able to pass wxUniChars to verious standard narrow and wide character
    // functions
@@ -152,7 +155,11 @@ private:
    typedef wxStringImpl::iterator iterator;

    // create the reference
+#if wxUSE_UNICODE_UTF8
+    wxUniCharRef(wxStringImpl& str, iterator pos) : m_str(str), m_pos(pos) {}
+#else
    wxUniCharRef(iterator pos) : m_pos(pos) {}
+#endif

 public:
    // NB: we have to make this public, because we don't have wxString
@@ -160,23 +167,26 @@ public:
    //     as friend; so at least don't use a ctor but a static function
    //     that must be used explicitly (this is more than using 'explicit'
    //     keyword on ctor!):
+#if wxUSE_UNICODE_UTF8
+    static wxUniCharRef CreateForString(wxStringImpl& str, iterator pos)
+        { return wxUniCharRef(str, pos); }
+#else
    static wxUniCharRef CreateForString(iterator pos)
        { return wxUniCharRef(pos); }
+#endif

    wxUniChar::value_type GetValue() const { return UniChar().GetValue(); }
+    bool IsAscii() const { return UniChar().GetValue(); }

    // Assignment operators:
-    wxUniCharRef& operator=(const wxUniCharRef& c)
-    {
-        *m_pos = *c.m_pos;
-        return *this;
-    };
+#if wxUSE_UNICODE_UTF8
+    wxUniCharRef& operator=(const wxUniChar& c);
+#else
+    wxUniCharRef& operator=(const wxUniChar& c) { *m_pos = c; return *this; }
+#endif

-    wxUniCharRef& operator=(const wxUniChar& c)
-    {
-        *m_pos = c;
-        return *this;
-    };
+    wxUniCharRef& operator=(const wxUniCharRef& c)
+        { return *this = c.UniChar(); }

    wxUniCharRef& operator=(char c) { return *this = wxUniChar(c); }
    wxUniCharRef& operator=(wchar_t c) { return *this = wxUniChar(c); }
@@ -227,11 +237,28 @@ public:
 #endif

 private:
-    wxUniChar UniChar() const { return *m_pos; }
+    wxUniChar UniChar() const
+    {
+#if wxUSE_UNICODE_UTF8
+        return DecodeChar(m_pos);
+#else
+        return *m_pos;
+#endif
+    }
+
+#if wxUSE_UNICODE_UTF8
+    // FIXME-UTF8: move this to a separate 'string operations' class
+    static wxUniChar DecodeChar(wxStringImpl::const_iterator i);
+    friend class WXDLLIMPEXP_BASE wxString;
+#endif
+
    friend class WXDLLIMPEXP_BASE wxUniChar;

 private:
-    // pointer to the character in string
+    // reference to the string and pointer to the character in string
+#if wxUSE_UNICODE_UTF8
+    wxStringImpl& m_str;
+#endif
    iterator m_pos;
 };

--- a/setup.h.in
+++ b/setup.h.in
@@ -557,6 +557,8 @@

 #define wxUSE_UNICODE_MSLU 0

+#define wxUSE_UNICODE_UTF8 0
+
 #define wxUSE_DC_CACHEING 0

 #define wxUSE_GADGETS 0
--- a/src/common/list.cpp
+++ b/src/common/list.cpp
@@ -53,7 +53,7 @@ bool wxListKey::operator==(wxListKeyValue value) const
            // by not putting return here...

        case wxKEY_STRING:
-            return wxStrcmp(m_key.string, value.string) == 0;
+            return *m_key.string == *value.string;

        case wxKEY_INTEGER:
            return m_key.integer == value.integer;
@@ -84,7 +84,7 @@ wxNodeBase::wxNodeBase(wxListBase *list,

        case wxKEY_STRING:
            // to be free()d later
-            m_key.string = wxStrdup(key.GetString());
+            m_key.string = new wxString(key.GetString());
            break;

        default:
@@ -107,7 +107,7 @@ wxNodeBase::~wxNodeBase()
    {
        if ( m_list->m_keyType == wxKEY_STRING )
        {
-            free(m_key.string);
+            delete m_key.string;
        }

        m_list->DetachNode(this);
@@ -257,7 +257,7 @@ wxNodeBase *wxListBase::Append(long key, void *object)
    return AppendCommon(node);
 }

-wxNodeBase *wxListBase::Append (const wxChar *key, void *object)
+wxNodeBase *wxListBase::Append (const wxString& key, void *object)
 {
    wxCHECK_MSG( (m_keyType == wxKEY_STRING) ||
                 (m_keyType == wxKEY_NONE && m_count == 0),
--- a/src/common/log.cpp
+++ b/src/common/log.cpp
@@ -84,10 +84,10 @@
 // ----------------------------------------------------------------------------

 // generic log function
-void wxVLogGeneric(wxLogLevel level, const wxChar *szFormat, va_list argptr)
+void wxVLogGeneric(wxLogLevel level, const wxString& format, va_list argptr)
 {
    if ( wxLog::IsEnabled() ) {
-        wxLog::OnLog(level, wxString::FormatV(szFormat, argptr), time(NULL));
+        wxLog::OnLog(level, wxString::FormatV(format, argptr), time(NULL));
    }
 }

@@ -100,11 +100,11 @@ void wxDoLogGeneric(wxLogLevel level, const wxChar *szFormat, ...)
 }

 #define IMPLEMENT_LOG_FUNCTION(level)                               \
-  void wxVLog##level(const wxChar *szFormat, va_list argptr)        \
+  void wxVLog##level(const wxString& format, va_list argptr)        \
  {                                                                 \
    if ( wxLog::IsEnabled() ) {                                     \
      wxLog::OnLog(wxLOG_##level,                                   \
-                   wxString::FormatV(szFormat, argptr), time(NULL));\
+                   wxString::FormatV(format, argptr), time(NULL));  \
    }                                                               \
  }                                                                 \
                                                                    \
@@ -134,9 +134,9 @@ void wxSafeShowMessage(const wxString& title, const wxString& text)

 // fatal errors can't be suppressed nor handled by the custom log target and
 // always terminate the program
-void wxVLogFatalError(const wxChar *szFormat, va_list argptr)
+void wxVLogFatalError(const wxString& format, va_list argptr)
 {
-    wxSafeShowMessage(_T("Fatal Error"), wxString::FormatV(szFormat, argptr));
+    wxSafeShowMessage(_T("Fatal Error"), wxString::FormatV(format, argptr));

 #ifdef __WXWINCE__
    ExitThread(3);
@@ -157,12 +157,12 @@ void wxDoLogFatalError(const wxChar *szFormat, ...)
 }

 // same as info, but only if 'verbose' mode is on
-void wxVLogVerbose(const wxChar *szFormat, va_list argptr)
+void wxVLogVerbose(const wxString& format, va_list argptr)
 {
    if ( wxLog::IsEnabled() ) {
        if ( wxLog::GetActiveTarget() != NULL && wxLog::GetVerbose() ) {
            wxLog::OnLog(wxLOG_Info,
-                         wxString::FormatV(szFormat, argptr), time(NULL));
+                         wxString::FormatV(format, argptr), time(NULL));
        }
    }
 }
@@ -194,17 +194,17 @@ void wxDoLogVerbose(const wxChar *szFormat, ...)
    va_end(argptr);                                                 \
  }

-  void wxVLogTrace(const wxChar *mask, const wxChar *szFormat, va_list argptr)
+  void wxVLogTrace(const wxString& mask, const wxString& format, va_list argptr)
  {
    if ( wxLog::IsEnabled() && wxLog::IsAllowedTraceMask(mask) ) {
      wxString msg;
-      msg << _T("(") << mask << _T(") ") << wxString::FormatV(szFormat, argptr);
+      msg << _T("(") << mask << _T(") ") << wxString::FormatV(format, argptr);

      wxLog::OnLog(wxLOG_Trace, msg, time(NULL));
    }
  }

-  void wxDoLogTrace(const wxChar *mask, const wxChar *szFormat, ...)
+  void wxDoLogTrace(const wxString& mask, const wxChar *szFormat, ...)
  {
    va_list argptr;
    va_start(argptr, szFormat);
@@ -212,13 +212,13 @@ void wxDoLogVerbose(const wxChar *szFormat, ...)
    va_end(argptr);
  }

-  void wxVLogTrace(wxTraceMask mask, const wxChar *szFormat, va_list argptr)
+  void wxVLogTrace(wxTraceMask mask, const wxString& format, va_list argptr)
  {
    // we check that all of mask bits are set in the current mask, so
    // that wxLogTrace(wxTraceRefCount | wxTraceOle) will only do something
    // if both bits are set.
    if ( wxLog::IsEnabled() && ((wxLog::GetTraceMask() & mask) == mask) ) {
-      wxLog::OnLog(wxLOG_Trace, wxString::FormatV(szFormat, argptr), time(NULL));
+      wxLog::OnLog(wxLOG_Trace, wxString::FormatV(format, argptr), time(NULL));
    }
  }

@@ -246,9 +246,9 @@ static inline wxString wxLogSysErrorHelper(long err)
    return wxString::Format(_(" (error %ld: %s)"), err, wxSysErrorMsg(err));
 }

-void WXDLLEXPORT wxVLogSysError(const wxChar *szFormat, va_list argptr)
+void WXDLLEXPORT wxVLogSysError(const wxString& format, va_list argptr)
 {
-    wxVLogSysError(wxSysErrorCode(), szFormat, argptr);
+    wxVLogSysError(wxSysErrorCode(), format, argptr);
 }

 void WXDLLEXPORT wxDoLogSysError(const wxChar *szFormat, ...)
@@ -259,11 +259,11 @@ void WXDLLEXPORT wxDoLogSysError(const wxChar *szFormat, ...)
    va_end(argptr);
 }

-void WXDLLEXPORT wxVLogSysError(long err, const wxChar *fmt, va_list argptr)
+void WXDLLEXPORT wxVLogSysError(long err, const wxString& format, va_list argptr)
 {
    if ( wxLog::IsEnabled() ) {
        wxLog::OnLog(wxLOG_Error,
-                     wxString::FormatV(fmt, argptr) + wxLogSysErrorHelper(err),
+                     wxString::FormatV(format, argptr) + wxLogSysErrorHelper(err),
                     time(NULL));
    }
 }
--- a/src/common/string.cpp
+++ b/src/common/string.cpp
@@ -105,6 +105,427 @@ wxSTD ostream& operator<<(wxSTD ostream& os, const wxWCharBuffer& str)

 #endif // wxUSE_STD_IOSTREAM

+// ===========================================================================
+// wxString class core
+// ===========================================================================
+
+#if wxUSE_UNICODE_UTF8
+
+// ---------------------------------------------------------------------------
+// UTF-8 operations
+// ---------------------------------------------------------------------------
+
+//
+// Table 3.1B from Unicode spec: Legal UTF-8 Byte Sequences
+//
+//     Code Points    | 1st Byte | 2nd Byte | 3rd Byte | 4th Byte |
+// -------------------+----------+----------+----------+----------+
+//   U+0000..U+007F   |  00..7F  |          |          |          |
+//   U+0080..U+07FF   |  C2..DF  |  80..BF  |          |          |
+//   U+0800..U+0FFF   |  E0      |  A0..BF  |  80..BF  |          |
+//   U+1000..U+FFFF   |  E1..EF  |  80..BF  |  80..BF  |          |
+//  U+10000..U+3FFFF  |  F0      |  90..BF  |  80..BF  |  80..BF  |
+//  U+40000..U+FFFFF  |  F1..F3  |  80..BF  |  80..BF  |  80..BF  |
+// U+100000..U+10FFFF |  F4      |  80..8F  |  80..BF  |  80..BF  |
+// -------------------+----------+----------+----------+----------+
+
+bool wxString::IsValidUtf8String(const char *str)
+{
+    if ( !str )
+        return true; // empty string is UTF8 string
+
+    const unsigned char *c = (const unsigned char*)str;
+
+    for ( ; *c; ++c )
+    {
+        unsigned char b = *c;
+
+        if ( b <= 0x7F ) // 00..7F
+            continue;
+
+        else if ( b < 0xC2 ) // invalid lead bytes: 80..C1
+            return false;
+
+        // two-byte sequences:
+        else if ( b <= 0xDF ) // C2..DF
+        {
+            b = *(++c);
+            if ( !(b >= 0x80 && b <= 0xBF ) )
+                return false;
+        }
+
+        // three-byte sequences:
+        else if ( b == 0xE0 )
+        {
+            b = *(++c);
+            if ( !(b >= 0xA0 && b <= 0xBF ) )
+                return false;
+            b = *(++c);
+            if ( !(b >= 0x80 && b <= 0xBF ) )
+                return false;
+        }
+        else if ( b <= 0xEF ) // E1..EF
+        {
+            for ( int i = 0; i < 2; ++i )
+            {
+                b = *(++c);
+                if ( !(b >= 0x80 && b <= 0xBF ) )
+                    return false;
+            }
+        }
+
+        // four-byte sequences:
+        else if ( b == 0xF0 )
+        {
+            b = *(++c);
+            if ( !(b >= 0x90 && b <= 0xBF ) )
+                return false;
+            for ( int i = 0; i < 2; ++i )
+            {
+                b = *(++c);
+                if ( !(b >= 0x80 && b <= 0xBF ) )
+                    return false;
+            }
+        }
+        else if ( b <= 0xF3 ) // F1..F3
+        {
+            for ( int i = 0; i < 3; ++i )
+            {
+                b = *(++c);
+                if ( !(b >= 0x80 && b <= 0xBF ) )
+                    return false;
+            }
+        }
+        else if ( b == 0xF4 )
+        {
+            b = *(++c);
+            if ( !(b >= 0x80 && b <= 0x8F ) )
+                return false;
+            for ( int i = 0; i < 2; ++i )
+            {
+                b = *(++c);
+                if ( !(b >= 0x80 && b <= 0xBF ) )
+                    return false;
+            }
+        }
+        else // otherwise, it's invalid lead byte
+            return false;
+    }
+
+    return true;
+}
+
+#ifdef __WXDEBUG__
+/* static */
+bool wxString::IsValidUtf8LeadByte(unsigned char c)
+{
+    return (c <= 0x7F) || (c >= 0xC2 && c <= 0xF4);
+}
+#endif
+
+unsigned char wxString::ms_utf8IterTable[256] = {
+    // single-byte sequences (ASCII):
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  // 00..0F
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  // 10..1F
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  // 20..2F
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  // 30..3F
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  // 40..4F
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  // 50..5F
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  // 60..6F
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  // 70..7F
+
+    // these are invalid, we use step 1 to skip
+    // over them (should never happen):
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  // 80..8F
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  // 90..9F
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  // A0..AF
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  // B0..BF
+    1, 1,                                            // C0,C1
+
+    // two-byte sequences:
+          2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,  // C2..CF
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,  // D0..DF
+
+    // three-byte sequences:
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,  // E0..EF
+
+    // four-byte sequences:
+    4, 4, 4, 4, 4,                                   // F0..F4
+
+    // these are invalid again (5- or 6-byte
+    // sequences and sequences for code points
+    // above U+10FFFF, as restricted by RFC 3629):
+                   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1   // F5..FF
+};
+
+/* static */
+void wxString::DecIter(wxStringImpl::const_iterator& i)
+{
+    wxASSERT( IsValidUtf8LeadByte(*i) );
+
+    // Non-lead bytes are all in the 0x80..0xBF range (i.e. 10xxxxxx in
+    // binary), so we just have to go back until we hit a byte that is either
+    // < 0x80 (i.e. 0xxxxxxx in binary) or 0xC0..0xFF (11xxxxxx in binary; this
+    // includes some invalid values, but we can ignore it here, because we
+    // assume valid UTF-8 input for the purpose of efficient implementation).
+    --i;
+    while ( ((*i) & 0xC0) == 0x80 /* 2 highest bits are '10' */ )
+        --i;
+}
+
+/* static */
+void wxString::DecIter(wxStringImpl::iterator& i)
+{
+    // FIXME-UTF8: use template instead
+    wxASSERT( IsValidUtf8LeadByte(*i) );
+    --i;
+    while ( ((*i) & 0xC0) == 0x80 /* 2 highest bits are '10' */ )
+        --i;
+}
+
+/* static */
+wxStringImpl::const_iterator
+wxString::AddToIter(wxStringImpl::const_iterator i, int n)
+{
+    wxStringImpl::const_iterator out(i);
+
+    if ( n > 0 )
+    {
+        for ( int j = 0; j < n; ++j )
+            IncIter(out);
+    }
+    else if ( n < 0 )
+    {
+        for ( int j = 0; j > n; --j )
+            DecIter(out);
+    }
+
+    return out;
+}
+
+wxStringImpl::iterator
+wxString::AddToIter(wxStringImpl::iterator i, int n)
+{
+    // FIXME-UTF8: use template instead
+    wxStringImpl::iterator out(i);
+
+    if ( n > 0 )
+    {
+        for ( int j = 0; j < n; ++j )
+            IncIter(out);
+    }
+    else if ( n < 0 )
+    {
+        for ( int j = 0; j > n; --j )
+            DecIter(out);
+    }
+
+    return out;
+}
+
+
+/* static */
+int wxString::DiffIters(wxStringImpl::const_iterator i1,
+                        wxStringImpl::const_iterator i2)
+{
+    int dist = 0;
+
+    if ( i1 < i2 )
+    {
+        while ( i1 != i2 )
+        {
+            IncIter(i1);
+            dist--;
+        }
+    }
+    else if ( i2 < i1 )
+    {
+        while ( i2 != i1 )
+        {
+            IncIter(i2);
+            dist++;
+        }
+    }
+
+    return dist;
+}
+
+int wxString::DiffIters(wxStringImpl::iterator i1, wxStringImpl::iterator i2)
+{
+    // FIXME-UTF8: use template instead
+    int dist = 0;
+
+    if ( i1 < i2 )
+    {
+        while ( i1 != i2 )
+        {
+            IncIter(i1);
+            dist--;
+        }
+    }
+    else if ( i2 < i1 )
+    {
+        while ( i2 != i1 )
+        {
+            IncIter(i2);
+            dist++;
+        }
+    }
+
+    return dist;
+}
+
+/* static */
+wxString::Utf8CharBuffer wxString::EncodeChar(wxUniChar ch)
+{
+    Utf8CharBuffer buf;
+    char *out = buf.data;
+
+    wxUniChar::value_type code = ch.GetValue();
+
+    //    Char. number range   |        UTF-8 octet sequence
+    //       (hexadecimal)     |              (binary)
+    //   ----------------------+---------------------------------------------
+    //   0000 0000 - 0000 007F | 0xxxxxxx
+    //   0000 0080 - 0000 07FF | 110xxxxx 10xxxxxx
+    //   0000 0800 - 0000 FFFF | 1110xxxx 10xxxxxx 10xxxxxx
+    //   0001 0000 - 0010 FFFF | 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
+    //
+    //   Code point value is stored in bits marked with 'x', lowest-order bit
+    //   of the value on the right side in the diagram above.
+    //                                                        (from RFC 3629)
+
+    if ( code <= 0x7F )
+    {
+        out[1] = 0;
+        out[0] = (char)code;
+    }
+    else if ( code <= 0x07FF )
+    {
+        out[2] = 0;
+        // NB: this line takes 6 least significant bits, encodes them as
+        // 10xxxxxx and discards them so that the next byte can be encoded:
+        out[1] = 0x80 | (code & 0x3F);  code >>= 6;
+        out[0] = 0xC0 | code;
+    }
+    else if ( code < 0xFFFF )
+    {
+        out[3] = 0;
+        out[2] = 0x80 | (code & 0x3F);  code >>= 6;
+        out[1] = 0x80 | (code & 0x3F);  code >>= 6;
+        out[0] = 0xE0 | code;
+    }
+    else if ( code <= 0x10FFFF )
+    {
+        out[4] = 0;
+        out[3] = 0x80 | (code & 0x3F);  code >>= 6;
+        out[2] = 0x80 | (code & 0x3F);  code >>= 6;
+        out[1] = 0x80 | (code & 0x3F);  code >>= 6;
+        out[0] = 0xF0 | code;
+    }
+    else
+    {
+        wxFAIL_MSG( _T("trying to encode undefined Unicode character") );
+        out[0] = 0;
+    }
+
+    return buf;
+}
+
+/* static */
+wxUniChar wxUniCharRef::DecodeChar(wxStringImpl::const_iterator i)
+{
+    wxASSERT( wxString::IsValidUtf8LeadByte(*i) ); // FIXME-UTF8: no "wxString::"
+
+    wxUniChar::value_type code = 0;
+    size_t len = wxString::GetUtf8CharLength(*i);
+    wxASSERT_MSG( len <= 4, _T("invalid UTF-8 sequence length") );
+
+    //    Char. number range   |        UTF-8 octet sequence
+    //       (hexadecimal)     |              (binary)
+    //   ----------------------+---------------------------------------------
+    //   0000 0000 - 0000 007F | 0xxxxxxx
+    //   0000 0080 - 0000 07FF | 110xxxxx 10xxxxxx
+    //   0000 0800 - 0000 FFFF | 1110xxxx 10xxxxxx 10xxxxxx
+    //   0001 0000 - 0010 FFFF | 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
+    //
+    //   Code point value is stored in bits marked with 'x', lowest-order bit
+    //   of the value on the right side in the diagram above.
+    //                                                        (from RFC 3629)
+
+    // mask to extract lead byte's value ('x' bits above), by sequence's length:
+    static const unsigned char s_leadValueMask[4] =  { 0x7F, 0x1F, 0x0F, 0x07 };
+#ifdef __WXDEBUG__
+    // mask and value of lead byte's most significant bits, by length:
+    static const unsigned char s_leadMarkerMask[4] = { 0x80, 0xE0, 0xF0, 0xF8 };
+    static const unsigned char s_leadMarkerVal[4] =  { 0x00, 0xC0, 0xE0, 0xF0 };
+#endif
+
+    // extract the lead byte's value bits:
+    wxASSERT_MSG( ((unsigned char)*i & s_leadMarkerMask[len-1]) ==
+                  s_leadMarkerVal[len-1],
+                  _T("invalid UTF-8 lead byte") );
+    code = (unsigned char)*i & s_leadValueMask[len-1];
+
+    // all remaining bytes, if any, are handled in the same way regardless of
+    // sequence's length:
+    for ( ++i ; len > 1; --len, ++i )
+    {
+        wxASSERT_MSG( ((unsigned char)*i & 0xC0) == 0x80,
+                      _T("invalid UTF-8 byte") );
+
+        code <<= 6;
+        code |= (unsigned char)*i & 0x3F;
+    }
+
+    return wxUniChar(code);
+}
+
+/* static */
+wxCharBuffer wxString::EncodeNChars(size_t n, wxUniChar ch)
+{
+    Utf8CharBuffer once(EncodeChar(ch));
+    // the IncIter() table can be used to determine the length of ch's encoding:
+    size_t len = ms_utf8IterTable[(unsigned char)once.data[0]];
+
+    wxCharBuffer buf(n * len);
+    char *ptr = buf.data();
+    for ( size_t i = 0; i < n; i++, ptr += len )
+    {
+        memcpy(ptr, once.data, len);
+    }
+
+    return buf;
+}
+
+
+void wxString::PosLenToImpl(size_t pos, size_t len,
+                            size_t *implPos, size_t *implLen) const
+{
+    if ( pos == npos )
+        *implPos = npos;
+    else
+    {
+        const_iterator i = begin() + pos;
+        *implPos = wxStringImpl::const_iterator(i) - m_impl.begin();
+        if ( len == npos )
+            *implLen = npos;
+        else
+        {
+            // too large length is interpreted as "to the end of the string"
+            // FIXME-UTF8: verify this is the case in std::string, assert
+            // otherwise
+            if ( pos + len > length() )
+                len = length() - pos;
+
+            *implLen = wxStringImpl::const_iterator(i + len) -
+                       wxStringImpl::const_iterator(i);
+        }
+    }
+}
+
+#endif // wxUSE_UNICODE_UTF8
+
 // ----------------------------------------------------------------------------
 // wxCStrData converted strings caching
 // ----------------------------------------------------------------------------
@@ -254,14 +675,14 @@ const wchar_t* wxCStrData::AsWChar() const
 // construction and conversion
 // ---------------------------------------------------------------------------

-#if wxUSE_UNICODE
+#if wxUSE_UNICODE_WCHAR
 /* static */
 wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
                                               const wxMBConv& conv)
 {
    // anything to do?
    if ( !psz || nLength == 0 )
-        return SubstrBufFromMB();
+        return SubstrBufFromMB(L"", 0);

    if ( nLength == npos )
        nLength = wxNO_LEN;
@@ -269,18 +690,51 @@ wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
    size_t wcLen;
    wxWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
    if ( !wcLen )
-        return SubstrBufFromMB();
+        return SubstrBufFromMB(_T(""), 0);
    else
        return SubstrBufFromMB(wcBuf, wcLen);
 }
-#else
+#endif // wxUSE_UNICODE_WCHAR
+
+#if wxUSE_UNICODE_UTF8
+/* static */
+wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
+                                               const wxMBConv& conv)
+{
+    // FIXME-UTF8: return as-is without copying under UTF8 locale, return
+    //             converted string under other locales - needs wxCharBuffer
+    //             changes
+
+    // anything to do?
+    if ( !psz || nLength == 0 )
+        return SubstrBufFromMB("", 0);
+
+    if ( nLength == npos )
+        nLength = wxNO_LEN;
+
+    // first convert to wide string:
+    size_t wcLen;
+    wxWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
+    if ( !wcLen )
+        return SubstrBufFromMB("", 0);
+
+    // and then to UTF-8:
+    SubstrBufFromMB buf(ConvertStr(wcBuf, wcLen, wxConvUTF8));
+    // widechar -> UTF-8 conversion isn't supposed to ever fail:
+    wxASSERT_MSG( buf.data, _T("conversion to UTF-8 failed") );
+
+    return buf;
+}
+#endif // wxUSE_UNICODE_UTF8
+
+#if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
 /* static */
 wxString::SubstrBufFromWC wxString::ConvertStr(const wchar_t *pwz, size_t nLength,
                                               const wxMBConv& conv)
 {
    // anything to do?
    if ( !pwz || nLength == 0 )
-        return SubstrBufFromWC();
+        return SubstrBufFromWC("", 0);

    if ( nLength == npos )
        nLength = wxNO_LEN;
@@ -288,34 +742,56 @@ wxString::SubstrBufFromWC wxString::ConvertStr(const wchar_t *pwz, size_t nLengt
    size_t mbLen;
    wxCharBuffer mbBuf(conv.cWC2MB(pwz, nLength, &mbLen));
    if ( !mbLen )
-        return SubstrBufFromWC();
+        return SubstrBufFromWC("", 0);
    else
        return SubstrBufFromWC(mbBuf, mbLen);
 }
-#endif
+#endif // wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE


-#if wxUSE_UNICODE
+#if wxUSE_UNICODE_WCHAR

 //Convert wxString in Unicode mode to a multi-byte string
 const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const
 {
-    return conv.cWC2MB(c_str(), length() + 1 /* size, not length */, NULL);
+    return conv.cWC2MB(wx_str(), length() + 1 /* size, not length */, NULL);
+}
+
+#elif wxUSE_UNICODE_UTF8
+
+const wxWCharBuffer wxString::wc_str() const
+{
+    return wxConvUTF8.cMB2WC(m_impl.c_str(),
+                             m_impl.length() + 1 /* size, not length */,
+                             NULL);
+}
+
+const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const
+{
+    // FIXME-UTF8: optimize the case when conv==wxConvUTF8 or wxConvLibc
+    //             under UTF8 locale
+    // FIXME-UTF8: use wc_str() here once we have buffers with length
+
+    size_t wcLen;
+    wxWCharBuffer wcBuf(
+            wxConvUTF8.cMB2WC(m_impl.c_str(),
+                              m_impl.length() + 1 /* size, not length */,
+                              &wcLen));
+    if ( !wcLen )
+        return wxCharBuffer("");
+
+    return conv.cWC2MB(wcBuf, wcLen, NULL);
 }

 #else // ANSI

-#if wxUSE_WCHAR_T
-
 //Converts this string to a wide character string if unicode
 //mode is not enabled and wxUSE_WCHAR_T is enabled
 const wxWCharBuffer wxString::wc_str(const wxMBConv& conv) const
 {
-    return conv.cMB2WC(c_str(), length() + 1 /* size, not length */, NULL);
+    return conv.cMB2WC(wx_str(), length() + 1 /* size, not length */, NULL);
 }

-#endif // wxUSE_WCHAR_T
-
 #endif // Unicode/ANSI

 // shrink to minimal size (releasing extra memory)
@@ -996,7 +1472,8 @@ bool wxString::EndsWith(const wxChar *suffix, wxString *rest) const
    wxASSERT_MSG( suffix, _T("invalid parameter in wxString::EndssWith") );

    int start = length() - wxStrlen(suffix);
-    if ( start < 0 || wxStrcmp(wx_str() + start, suffix) != 0 )
+
+    if ( start < 0 || compare(start, npos, suffix) != 0 )
        return false;

    if ( rest )
@@ -1420,7 +1897,7 @@ int wxString::PrintfV(const wxString& format, va_list argptr)
        // only a copy
        va_list argptrcopy;
        wxVaCopy(argptrcopy, argptr);
-        int len = wxVsnprintf(buf, size, format, argptrcopy);
+        int len = wxVsnprintf(buf, size, (const wxChar*)/*FIXME-UTF8*/format, argptrcopy);
        va_end(argptrcopy);

        // some implementations of vsnprintf() don't NUL terminate
--- a/src/common/stringimpl.cpp
+++ b/src/common/stringimpl.cpp
@@ -1,5 +1,5 @@
 /////////////////////////////////////////////////////////////////////////////
-// Name:        src/common/string.cpp
+// Name:        src/common/stringimpl.cpp
 // Purpose:     wxString class
 // Author:      Vadim Zeitlin, Ryan Norton
 // Modified by:
@@ -56,12 +56,10 @@
    #define wxStringMemcpy   memcpy
    #define wxStringMemcmp   memcmp
    #define wxStringMemchr   memchr
-    #define wxStringStrlen   strlen
 #else
    #define wxStringMemcpy   wxTmemcpy
    #define wxStringMemcmp   wxTmemcmp
    #define wxStringMemchr   wxTmemchr
-    #define wxStringStrlen   wxStrlen
 #endif


@@ -80,6 +78,10 @@ const size_t wxStringImpl::npos = (size_t) -1;

 #if wxUSE_STL_BASED_WXSTRING

+// FIXME-UTF8: get rid of this, have only one wxEmptyString
+#if wxUSE_UNICODE_UTF8
+extern const wxStringCharType WXDLLIMPEXP_BASE *wxEmptyStringImpl = "";
+#endif
 extern const wxChar WXDLLIMPEXP_BASE *wxEmptyString = _T("");

 #else
@@ -90,11 +92,17 @@ extern const wxChar WXDLLIMPEXP_BASE *wxEmptyString = _T("");
 static const struct
 {
  wxStringData data;
-  wxChar dummy;
+  wxStringCharType dummy;
 } g_strEmpty = { {-1, 0, 0}, wxT('\0') };

 // empty C style string: points to 'string data' byte of g_strEmpty
-extern const wxChar WXDLLIMPEXP_BASE *wxEmptyString = &g_strEmpty.dummy;
+#if wxUSE_UNICODE_UTF8
+// FIXME-UTF8: get rid of this, have only one wxEmptyString
+extern const wxStringCharType WXDLLIMPEXP_BASE *wxEmptyStringImpl = &g_strEmpty.dummy;
+extern const wxChar WXDLLIMPEXP_BASE *wxEmptyString = _T("");
+#else
+extern const wxStringCharType WXDLLIMPEXP_BASE *wxEmptyString = &g_strEmpty.dummy;
+#endif

 #endif

@@ -111,7 +119,7 @@ extern const wxChar WXDLLIMPEXP_BASE *wxEmptyString = &g_strEmpty.dummy;
  class Averager
  {
  public:
-    Averager(const wxChar *sz) { m_sz = sz; m_nTotal = m_nCount = 0; }
+    Averager(const wxStringCharType *sz) { m_sz = sz; m_nTotal = m_nCount = 0; }
   ~Averager()
   { wxPrintf("wxString: average %s = %f\n", m_sz, ((float)m_nTotal)/m_nCount); }

@@ -119,7 +127,7 @@ extern const wxChar WXDLLIMPEXP_BASE *wxEmptyString = &g_strEmpty.dummy;

  private:
    size_t m_nCount, m_nTotal;
-    const wxChar *m_sz;
+    const wxStringCharType *m_sz;
  } g_averageLength("allocation size"),
    g_averageSummandLength("summand length"),
    g_averageConcatHit("hit probability in concat"),
@@ -147,15 +155,16 @@ void wxStringData::Free()
 // ===========================================================================

 // takes nLength elements of psz starting at nPos
-void wxStringImpl::InitWith(const wxChar *psz, size_t nPos, size_t nLength)
+void wxStringImpl::InitWith(const wxStringCharType *psz,
+                            size_t nPos, size_t nLength)
 {
  Init();

  // if the length is not given, assume the string to be NUL terminated
  if ( nLength == npos ) {
-    wxASSERT_MSG( nPos <= wxStrlen(psz), _T("index out of bounds") );
+    wxASSERT_MSG( nPos <= Strsize(psz), _T("index out of bounds") );

-    nLength = wxStrlen(psz + nPos);
+    nLength = Strsize(psz + nPos);
  }

  STATISTICS_ADD(InitialLength, nLength);
@@ -201,7 +210,7 @@ bool wxStringImpl::AllocBuffer(size_t nLen)
  wxASSERT( nLen >  0 );

  // make sure that we don't overflow
-  wxASSERT( nLen < (INT_MAX / sizeof(wxChar)) -
+  wxASSERT( nLen < (INT_MAX / sizeof(wxStringCharType)) -
                   (sizeof(wxStringData) + EXTRA_ALLOC + 1) );

  STATISTICS_ADD(Length, nLen);
@@ -210,7 +219,7 @@ bool wxStringImpl::AllocBuffer(size_t nLen)
  // 1) one extra character for '\0' termination
  // 2) sizeof(wxStringData) for housekeeping info
  wxStringData* pData = (wxStringData*)
-    malloc(sizeof(wxStringData) + (nLen + EXTRA_ALLOC + 1)*sizeof(wxChar));
+    malloc(sizeof(wxStringData) + (nLen + EXTRA_ALLOC + 1)*sizeof(wxStringCharType));

  if ( pData == NULL ) {
    // allocation failures are handled by the caller
@@ -269,7 +278,8 @@ bool wxStringImpl::AllocBeforeWrite(size_t nLen)
      nLen += EXTRA_ALLOC;

      pData = (wxStringData*)
-          realloc(pData, sizeof(wxStringData) + (nLen + 1)*sizeof(wxChar));
+          realloc(pData,
+                  sizeof(wxStringData) + (nLen + 1)*sizeof(wxStringCharType));

      if ( pData == NULL ) {
        // allocation failures are handled by the caller
@@ -331,7 +341,7 @@ bool wxStringImpl::Alloc(size_t nLen)
      nLen += EXTRA_ALLOC;

      pData = (wxStringData *)
-                malloc(sizeof(wxStringData) + (nLen + 1)*sizeof(wxChar));
+             malloc(sizeof(wxStringData) + (nLen + 1)*sizeof(wxStringCharType));

      if ( pData == NULL ) {
        // allocation failure handled by caller
@@ -352,14 +362,14 @@ bool wxStringImpl::Alloc(size_t nLen)
        return false;
      }
      // +1 to copy the terminator, too
-      memcpy(m_pchData, pData->data(), (nOldLen+1)*sizeof(wxChar));
+      memcpy(m_pchData, pData->data(), (nOldLen+1)*sizeof(wxStringCharType));
      GetStringData()->nDataLength = nOldLen;
    }
    else {
      nLen += EXTRA_ALLOC;

      pData = (wxStringData *)
-        realloc(pData, sizeof(wxStringData) + (nLen + 1)*sizeof(wxChar));
+        realloc(pData, sizeof(wxStringData) + (nLen + 1)*sizeof(wxStringCharType));

      if ( pData == NULL ) {
        // allocation failure handled by caller
@@ -411,11 +421,12 @@ wxStringImpl& wxStringImpl::erase(size_t nStart, size_t nLen)
    return *this;
 }

-wxStringImpl& wxStringImpl::insert(size_t nPos, const wxChar *sz, size_t n)
+wxStringImpl& wxStringImpl::insert(size_t nPos,
+                                   const wxStringCharType *sz, size_t n)
 {
    wxASSERT( nPos <= length() );

-    if ( n == npos ) n = wxStrlen(sz);
+    if ( n == npos ) n = Strsize(sz);
    if ( n == 0 ) return *this;

    if ( !Alloc(length() + n) || !CopyBeforeWrite() ) {
@@ -424,8 +435,8 @@ wxStringImpl& wxStringImpl::insert(size_t nPos, const wxChar *sz, size_t n)
    }

    memmove(m_pchData + nPos + n, m_pchData + nPos,
-            (length() - nPos) * sizeof(wxChar));
-    memcpy(m_pchData + nPos, sz, n * sizeof(wxChar));
+            (length() - nPos) * sizeof(wxStringCharType));
+    memcpy(m_pchData + nPos, sz, n * sizeof(wxStringCharType));
    GetStringData()->nDataLength = length() + n;
    m_pchData[length()] = '\0';

@@ -487,7 +498,8 @@ size_t wxStringImpl::find(const wxStringImpl& str, size_t nStart) const
    return p - c_str() + nLenOther <= nLen ? p - c_str() : npos;
 }

-size_t wxStringImpl::find(const wxChar* sz, size_t nStart, size_t n) const
+size_t wxStringImpl::find(const wxStringCharType* sz,
+                          size_t nStart, size_t n) const
 {
    return find(wxStringImpl(sz, n), nStart);
 }
@@ -534,7 +546,8 @@ size_t wxStringImpl::rfind(const wxStringImpl& str, size_t nStart) const
    return npos;
 }

-size_t wxStringImpl::rfind(const wxChar* sz, size_t nStart, size_t n) const
+size_t wxStringImpl::rfind(const wxStringCharType* sz,
+                           size_t nStart, size_t n) const
 {
    return rfind(wxStringImpl(sz, n), nStart);
 }
@@ -562,7 +575,7 @@ size_t wxStringImpl::rfind(wxStringCharType ch, size_t nStart) const
 }

 wxStringImpl& wxStringImpl::replace(size_t nStart, size_t nLen,
-                                    const wxChar *sz)
+                                    const wxStringCharType *sz)
 {
  wxASSERT_MSG( nStart <= length(),
                _T("index out of bounds in wxStringImpl::replace") );
@@ -607,7 +620,7 @@ wxStringImpl& wxStringImpl::replace(size_t nStart, size_t nLen,
 }

 wxStringImpl& wxStringImpl::replace(size_t nStart, size_t nLen,
-                                    const wxChar* sz, size_t nCount)
+                                    const wxStringCharType* sz, size_t nCount)
 {
  return replace(nStart, nLen, wxStringImpl(sz, nCount).c_str());
 }
@@ -643,24 +656,25 @@ wxStringImpl& wxStringImpl::operator=(const wxStringImpl& stringSrc)
 // assigns a single character
 wxStringImpl& wxStringImpl::operator=(wxStringCharType ch)
 {
-  wxChar c(ch);
+  wxStringCharType c(ch);
  if ( !AssignCopy(1, &c) ) {
-    wxFAIL_MSG( _T("out of memory in wxStringImpl::operator=(wxChar)") );
+    wxFAIL_MSG( _T("out of memory in wxStringImpl::operator=(wxStringCharType)") );
  }
  return *this;
 }

 // assigns C string
-wxStringImpl& wxStringImpl::operator=(const wxChar *psz)
+wxStringImpl& wxStringImpl::operator=(const wxStringCharType *psz)
 {
-  if ( !AssignCopy(wxStrlen(psz), psz) ) {
-    wxFAIL_MSG( _T("out of memory in wxStringImpl::operator=(const wxChar *)") );
+  if ( !AssignCopy(Strsize(psz), psz) ) {
+    wxFAIL_MSG( _T("out of memory in wxStringImpl::operator=(const wxStringCharType *)") );
  }
  return *this;
 }

 // helper function: does real copy
-bool wxStringImpl::AssignCopy(size_t nSrcLen, const wxChar *pszSrcData)
+bool wxStringImpl::AssignCopy(size_t nSrcLen,
+                              const wxStringCharType *pszSrcData)
 {
  if ( nSrcLen == 0 ) {
    Reinit();
@@ -670,7 +684,7 @@ bool wxStringImpl::AssignCopy(size_t nSrcLen, const wxChar *pszSrcData)
      // allocation failure handled by caller
      return false;
    }
-    memcpy(m_pchData, pszSrcData, nSrcLen*sizeof(wxChar));
+    memcpy(m_pchData, pszSrcData, nSrcLen*sizeof(wxStringCharType));
    GetStringData()->nDataLength = nSrcLen;
    m_pchData[nSrcLen] = wxT('\0');
  }
@@ -682,7 +696,8 @@ bool wxStringImpl::AssignCopy(size_t nSrcLen, const wxChar *pszSrcData)
 // ---------------------------------------------------------------------------

 // add something to this string
-bool wxStringImpl::ConcatSelf(size_t nSrcLen, const wxChar *pszSrcData,
+bool wxStringImpl::ConcatSelf(size_t nSrcLen,
+                              const wxStringCharType *pszSrcData,
                              size_t nMaxLen)
 {
  STATISTICS_ADD(SummandLength, nSrcLen);
@@ -705,7 +720,7 @@ bool wxStringImpl::ConcatSelf(size_t nSrcLen, const wxChar *pszSrcData,
          // allocation failure handled by caller
          return false;
      }
-      memcpy(m_pchData, pOldData->data(), nLen*sizeof(wxChar));
+      memcpy(m_pchData, pOldData->data(), nLen*sizeof(wxStringCharType));
      pOldData->Unlock();
    }
    else if ( nNewLen > pData->nAllocLength ) {
@@ -728,7 +743,7 @@ bool wxStringImpl::ConcatSelf(size_t nSrcLen, const wxChar *pszSrcData,
    wxASSERT( nNewLen <= GetStringData()->nAllocLength );

    // fast concatenation - all is done in our buffer
-    memcpy(m_pchData + nLen, pszSrcData, nSrcLen*sizeof(wxChar));
+    memcpy(m_pchData + nLen, pszSrcData, nSrcLen*sizeof(wxStringCharType));

    m_pchData[nNewLen] = wxT('\0');          // put terminating '\0'
    GetStringData()->nDataLength = nNewLen; // and fix the length
@@ -755,7 +770,7 @@ wxChar *wxStringImpl::DoGetWriteBuf(size_t nLen)
 // put string back in a reasonable state after GetWriteBuf
 void wxStringImpl::DoUngetWriteBuf()
 {
-  DoUngetWriteBuf(wxStrlen(m_pchData));
+  DoUngetWriteBuf(Strsize(m_pchData));
 }

 void wxStringImpl::DoUngetWriteBuf(size_t nLen)
--- a/src/common/strvararg.cpp
+++ b/src/common/strvararg.cpp
@@ -32,20 +32,30 @@
 // implementation
 // ============================================================================

-const wxStringCharType *wxArgNormalizer<const wxCStrData&>::get() const
+const wxChar *wxArgNormalizer<const wxCStrData&>::get() const
 {
+    // FIXME-UTF8: use some way that doesn't involve implicit conversion,
+    //             so that we deallocate any converted buffer immediately;
+    //             can't use AsString() because it returns wxString and not
+    //             const wxString&, unfortunately; use As[W]CharBuf() when
+    //             available.
    return m_value;
 }

-const wxStringCharType *wxArgNormalizer<const wxString&>::get() const
+const wxChar *wxArgNormalizer<const wxString&>::get() const
 {
+#if wxUSE_UNICODE_UTF8 // FIXME-UTF8
+    return (const wxChar*)m_value;
+#else
    return m_value.wx_str();
+#endif
 }

-#if wxUSE_UNICODE_WCHAR
-
+#if wxUSE_UNICODE // FIXME-UTF8: should be wxUSE_UNICODE_WCHAR
 wxArgNormalizer<const char*>::wxArgNormalizer(const char *value)
 {
+    // FIXME-UTF8: move this to the header so that m_value doesn't have
+    //             to be dynamically allocated
    m_value = new wxWCharBuffer(wxConvLibc.cMB2WC(value));
 }

@@ -58,12 +68,17 @@ const wchar_t *wxArgNormalizer<const char*>::get() const
 {
    return m_value->data();
 }
+#endif // wxUSE_UNICODE_WCHAR

-#elif wxUSE_WCHAR_T // !wxUSE_UNICODE_WCHAR && wxUSE_WCHAR_T

+#if /*wxUSE_UNICODE_UTF8 ||*/ !wxUSE_UNICODE // FIXME-UTF8
 wxArgNormalizer<const wchar_t*>::wxArgNormalizer(const wchar_t *value)
 {
+#if wxUSE_UNICODE_UTF8 // FIXME-UTF8: this will be the only case
+    m_value = new wxCharBuffer(wxConvUTF8.cWC2MB(value));
+#else
    m_value = new wxCharBuffer(wxConvLibc.cWC2MB(value));
+#endif
 }

 wxArgNormalizer<const wchar_t*>::~wxArgNormalizer()
@@ -75,12 +90,44 @@ const char *wxArgNormalizer<const wchar_t*>::get() const
 {
    return m_value->data();
 }
+#endif // wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
+
+#if 0 // wxUSE_UNICODE_UTF8 - FIXME-UTF8
+wxArgNormalizer<const char*>::wxArgNormalizer(const char *value)
+{
+    // FIXME-UTF8: move this to the header so that m_value doesn't have
+    //             to be dynamically allocated
+    // FIXME-UTF8: optimize this if current locale is UTF-8 one
+
+    // convert to widechar string first:
+    wxWCharBuffer buf(wxConvLibc.cMB2WC(value));
+
+    if ( buf )
+    {
+        // then to UTF-8:
+        m_value = new wxCharBuffer(wxConvUTF8.cWC2MB(value));
+    }
+    else
+    {
+        m_value = new wxCharBuffer();
+    }
+}
+
+wxArgNormalizer<const char*>::~wxArgNormalizer()
+{
+    delete m_value;
+}
+
+const char *wxArgNormalizer<const char*>::get() const
+{
+    return m_value->data();
+}
+#endif // wxUSE_UNICODE_UTF8
+

-#endif // wxUSE_UNICODE_WCHAR / !wxUSE_UNICODE_WCHAR && wxUSE_WCHAR_T

 // FIXME-UTF8: move this to the header once it's possible to include buffer.h
 //             without including wxcrt.h
-
 wxArgNormalizer<wxCharBuffer>::wxArgNormalizer(const wxCharBuffer& buf)
    : wxArgNormalizer<const char*>(buf.data())
 {
--- a/src/common/unichar.cpp
+++ b/src/common/unichar.cpp
@@ -25,10 +25,17 @@

 #include "wx/unichar.h"

+// FIXME-UTF8: remove once UTF-8 functions moved outside
+#include "wx/string.h"
+
 // ===========================================================================
 // implementation
 // ===========================================================================

+// ---------------------------------------------------------------------------
+// wxUniChar
+// ---------------------------------------------------------------------------
+
 /* static */
 wxUniChar::value_type wxUniChar::From8bit(char c)
 {
@@ -55,3 +62,35 @@ char wxUniChar::To8bit(wxUniChar::value_type c)
        return '?'; // FIXME-UTF8: what to use as failure character?
    return buf[0];
 }
+
+
+// ---------------------------------------------------------------------------
+// wxUniCharRef
+// ---------------------------------------------------------------------------
+
+#if wxUSE_UNICODE_UTF8
+wxUniCharRef& wxUniCharRef::operator=(const wxUniChar& c)
+{
+    wxString::Utf8CharBuffer utf(wxString::EncodeChar(c));
+    size_t lenOld = wxString::GetUtf8CharLength(*m_pos);
+    size_t lenNew = wxString::GetUtf8CharLength(utf[0]);
+
+    if ( lenNew == lenOld )
+    {
+        iterator pos(m_pos);
+        for ( size_t i = 0; i < lenNew; ++i, ++pos )
+            *pos = utf[i];
+    }
+    else
+    {
+        size_t idx = m_pos - m_str.begin();
+
+        m_str.replace(m_pos, m_pos + lenOld, utf, lenNew);
+
+        // this is needed to keep m_pos valid:
+        m_pos = m_str.begin() + idx;
+    }
+
+    return *this;
+}
+#endif // wxUSE_UNICODE_UTF8
--- a/src/common/uri.cpp
+++ b/src/common/uri.cpp
@@ -641,7 +641,8 @@ const wxChar* wxURI::ParsePath(const wxChar* uri, bool bReference, bool bNormali
        if (bNormalize)
        {
            wxStringBufferLength theBuffer(m_path, m_path.length() + 1);
-#if wxUSE_STL
+#if wxUSE_STL || wxUSE_UNICODE_UTF8
+            // FIXME-UTF8: have some wxReadWriteStringBuffer instead?
            wxTmemcpy(theBuffer, m_path.c_str(), m_path.length()+1);
 #endif
            Normalize(theBuffer, true);
@@ -693,7 +694,8 @@ const wxChar* wxURI::ParsePath(const wxChar* uri, bool bReference, bool bNormali
            if (bNormalize)
            {
                wxStringBufferLength theBuffer(m_path, m_path.length() + 1);
-#if wxUSE_STL
+#if wxUSE_STL || wxUSE_UNICODE_UTF8
+                // FIXME-UTF8: have some wxReadWriteStringBuffer instead?
                wxTmemcpy(theBuffer, m_path.c_str(), m_path.length()+1);
 #endif
                Normalize(theBuffer);