Use tableUtf8Lengths[] in sringops.cpp too
This commit is contained in:
@@ -94,15 +94,15 @@ struct WXDLLIMPEXP_BASE wxStringOperationsUtf8
|
||||
return (c <= 0x7F) || (c >= 0xC2 && c <= 0xF4);
|
||||
}
|
||||
|
||||
// table of offsets to skip forward when iterating over UTF-8 sequence
|
||||
static const unsigned char ms_utf8IterTable[256];
|
||||
// returns offset to skip forward when iterating over UTF-8 sequence
|
||||
static unsigned char GetUTF8IterOffset(unsigned char c);
|
||||
|
||||
|
||||
template<typename Iterator>
|
||||
static void IncIter(Iterator& i)
|
||||
{
|
||||
wxASSERT( IsValidUtf8LeadByte(*i) );
|
||||
i += ms_utf8IterTable[(unsigned char)*i];
|
||||
i += GetUTF8IterOffset(*i);
|
||||
}
|
||||
|
||||
template<typename Iterator>
|
||||
@@ -178,7 +178,7 @@ struct WXDLLIMPEXP_BASE wxStringOperationsUtf8
|
||||
static size_t GetUtf8CharLength(char c)
|
||||
{
|
||||
wxASSERT( IsValidUtf8LeadByte(c) );
|
||||
return ms_utf8IterTable[(unsigned char)c];
|
||||
return GetUTF8IterOffset(c);
|
||||
}
|
||||
|
||||
// decodes single UTF-8 character from UTF-8 string
|
||||
|
@@ -23,6 +23,8 @@
|
||||
#include "wx/stringops.h"
|
||||
#endif
|
||||
|
||||
#include "wx/private/unicode.h"
|
||||
|
||||
// ===========================================================================
|
||||
// implementation
|
||||
// ===========================================================================
|
||||
@@ -97,40 +99,13 @@ wxWxCharBuffer wxStringOperationsWchar::EncodeNChars(size_t n, const wxUniChar&
|
||||
// UTF-8 sequences lengths
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const unsigned char wxStringOperationsUtf8::ms_utf8IterTable[256] = {
|
||||
// single-byte sequences (ASCII):
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 00..0F
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 10..1F
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 20..2F
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 30..3F
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 40..4F
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 50..5F
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 60..6F
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 70..7F
|
||||
|
||||
// these are invalid, we use step 1 to skip
|
||||
// over them (should never happen):
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 80..8F
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 90..9F
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // A0..AF
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // B0..BF
|
||||
1, 1, // C0,C1
|
||||
|
||||
// two-byte sequences:
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // C2..CF
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // D0..DF
|
||||
|
||||
// three-byte sequences:
|
||||
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // E0..EF
|
||||
|
||||
// four-byte sequences:
|
||||
4, 4, 4, 4, 4, // F0..F4
|
||||
|
||||
// these are invalid again (5- or 6-byte
|
||||
// sequences and sequences for code points
|
||||
// above U+10FFFF, as restricted by RFC 3629):
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // F5..FF
|
||||
};
|
||||
static unsigned char wxStringOperationsUtf8::GetUTF8IterOffset(unsigned char c)
|
||||
{
|
||||
unsigned char l = tableUtf8Lengths[c];
|
||||
if ( !l ) //skip over invalid characters
|
||||
l = 1;
|
||||
return l;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// UTF-8 operations
|
||||
@@ -166,7 +141,7 @@ bool wxStringOperationsUtf8::IsValidUtf8String(const char *str, size_t len)
|
||||
{
|
||||
// if the string is not NULL-terminated, verify we have enough
|
||||
// bytes in it left for current character's encoding:
|
||||
if ( c + ms_utf8IterTable[*c] > end )
|
||||
if ( c + GetUTF8IterOffset(*c) > end )
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -364,7 +339,7 @@ wxCharBuffer wxStringOperationsUtf8::EncodeNChars(size_t n, const wxUniChar& ch)
|
||||
{
|
||||
Utf8CharBuffer once(EncodeChar(ch));
|
||||
// the IncIter() table can be used to determine the length of ch's encoding:
|
||||
size_t len = ms_utf8IterTable[(unsigned char)once.data[0]];
|
||||
size_t len = GetUTF8IterOffset(once.data[0]);
|
||||
|
||||
wxCharBuffer buf(n * len);
|
||||
char *ptr = buf.data();
|
||||
|
Reference in New Issue
Block a user