Use tableUtf8Lengths[] in sringops.cpp too
This commit is contained in:
@@ -94,15 +94,15 @@ struct WXDLLIMPEXP_BASE wxStringOperationsUtf8
|
|||||||
return (c <= 0x7F) || (c >= 0xC2 && c <= 0xF4);
|
return (c <= 0x7F) || (c >= 0xC2 && c <= 0xF4);
|
||||||
}
|
}
|
||||||
|
|
||||||
// table of offsets to skip forward when iterating over UTF-8 sequence
|
// returns offset to skip forward when iterating over UTF-8 sequence
|
||||||
static const unsigned char ms_utf8IterTable[256];
|
static unsigned char GetUTF8IterOffset(unsigned char c);
|
||||||
|
|
||||||
|
|
||||||
template<typename Iterator>
|
template<typename Iterator>
|
||||||
static void IncIter(Iterator& i)
|
static void IncIter(Iterator& i)
|
||||||
{
|
{
|
||||||
wxASSERT( IsValidUtf8LeadByte(*i) );
|
wxASSERT( IsValidUtf8LeadByte(*i) );
|
||||||
i += ms_utf8IterTable[(unsigned char)*i];
|
i += GetUTF8IterOffset(*i);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename Iterator>
|
template<typename Iterator>
|
||||||
@@ -178,7 +178,7 @@ struct WXDLLIMPEXP_BASE wxStringOperationsUtf8
|
|||||||
static size_t GetUtf8CharLength(char c)
|
static size_t GetUtf8CharLength(char c)
|
||||||
{
|
{
|
||||||
wxASSERT( IsValidUtf8LeadByte(c) );
|
wxASSERT( IsValidUtf8LeadByte(c) );
|
||||||
return ms_utf8IterTable[(unsigned char)c];
|
return GetUTF8IterOffset(c);
|
||||||
}
|
}
|
||||||
|
|
||||||
// decodes single UTF-8 character from UTF-8 string
|
// decodes single UTF-8 character from UTF-8 string
|
||||||
|
@@ -23,6 +23,8 @@
|
|||||||
#include "wx/stringops.h"
|
#include "wx/stringops.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#include "wx/private/unicode.h"
|
||||||
|
|
||||||
// ===========================================================================
|
// ===========================================================================
|
||||||
// implementation
|
// implementation
|
||||||
// ===========================================================================
|
// ===========================================================================
|
||||||
@@ -97,40 +99,13 @@ wxWxCharBuffer wxStringOperationsWchar::EncodeNChars(size_t n, const wxUniChar&
|
|||||||
// UTF-8 sequences lengths
|
// UTF-8 sequences lengths
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
const unsigned char wxStringOperationsUtf8::ms_utf8IterTable[256] = {
|
static unsigned char wxStringOperationsUtf8::GetUTF8IterOffset(unsigned char c)
|
||||||
// single-byte sequences (ASCII):
|
{
|
||||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 00..0F
|
unsigned char l = tableUtf8Lengths[c];
|
||||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 10..1F
|
if ( !l ) //skip over invalid characters
|
||||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 20..2F
|
l = 1;
|
||||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 30..3F
|
return l;
|
||||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 40..4F
|
}
|
||||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 50..5F
|
|
||||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 60..6F
|
|
||||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 70..7F
|
|
||||||
|
|
||||||
// these are invalid, we use step 1 to skip
|
|
||||||
// over them (should never happen):
|
|
||||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 80..8F
|
|
||||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 90..9F
|
|
||||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // A0..AF
|
|
||||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // B0..BF
|
|
||||||
1, 1, // C0,C1
|
|
||||||
|
|
||||||
// two-byte sequences:
|
|
||||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // C2..CF
|
|
||||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // D0..DF
|
|
||||||
|
|
||||||
// three-byte sequences:
|
|
||||||
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // E0..EF
|
|
||||||
|
|
||||||
// four-byte sequences:
|
|
||||||
4, 4, 4, 4, 4, // F0..F4
|
|
||||||
|
|
||||||
// these are invalid again (5- or 6-byte
|
|
||||||
// sequences and sequences for code points
|
|
||||||
// above U+10FFFF, as restricted by RFC 3629):
|
|
||||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // F5..FF
|
|
||||||
};
|
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
// UTF-8 operations
|
// UTF-8 operations
|
||||||
@@ -166,7 +141,7 @@ bool wxStringOperationsUtf8::IsValidUtf8String(const char *str, size_t len)
|
|||||||
{
|
{
|
||||||
// if the string is not NULL-terminated, verify we have enough
|
// if the string is not NULL-terminated, verify we have enough
|
||||||
// bytes in it left for current character's encoding:
|
// bytes in it left for current character's encoding:
|
||||||
if ( c + ms_utf8IterTable[*c] > end )
|
if ( c + GetUTF8IterOffset(*c) > end )
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -364,7 +339,7 @@ wxCharBuffer wxStringOperationsUtf8::EncodeNChars(size_t n, const wxUniChar& ch)
|
|||||||
{
|
{
|
||||||
Utf8CharBuffer once(EncodeChar(ch));
|
Utf8CharBuffer once(EncodeChar(ch));
|
||||||
// the IncIter() table can be used to determine the length of ch's encoding:
|
// the IncIter() table can be used to determine the length of ch's encoding:
|
||||||
size_t len = ms_utf8IterTable[(unsigned char)once.data[0]];
|
size_t len = GetUTF8IterOffset(once.data[0]);
|
||||||
|
|
||||||
wxCharBuffer buf(n * len);
|
wxCharBuffer buf(n * len);
|
||||||
char *ptr = buf.data();
|
char *ptr = buf.data();
|
||||||
|
Reference in New Issue
Block a user