Use tableUtf8Lengths[] in sringops.cpp too

This commit is contained in:
Pavel Tyunin
2020-10-07 15:41:15 +03:00
parent 240fcee90e
commit b536457e07
2 changed files with 15 additions and 40 deletions

View File

@@ -94,15 +94,15 @@ struct WXDLLIMPEXP_BASE wxStringOperationsUtf8
return (c <= 0x7F) || (c >= 0xC2 && c <= 0xF4);
}
// table of offsets to skip forward when iterating over UTF-8 sequence
static const unsigned char ms_utf8IterTable[256];
// returns offset to skip forward when iterating over UTF-8 sequence
static unsigned char GetUTF8IterOffset(unsigned char c);
template<typename Iterator>
static void IncIter(Iterator& i)
{
wxASSERT( IsValidUtf8LeadByte(*i) );
i += ms_utf8IterTable[(unsigned char)*i];
i += GetUTF8IterOffset(*i);
}
template<typename Iterator>
@@ -178,7 +178,7 @@ struct WXDLLIMPEXP_BASE wxStringOperationsUtf8
static size_t GetUtf8CharLength(char c)
{
wxASSERT( IsValidUtf8LeadByte(c) );
return ms_utf8IterTable[(unsigned char)c];
return GetUTF8IterOffset(c);
}
// decodes single UTF-8 character from UTF-8 string

View File

@@ -23,6 +23,8 @@
#include "wx/stringops.h"
#endif
#include "wx/private/unicode.h"
// ===========================================================================
// implementation
// ===========================================================================
@@ -97,40 +99,13 @@ wxWxCharBuffer wxStringOperationsWchar::EncodeNChars(size_t n, const wxUniChar&
// UTF-8 sequences lengths
// ---------------------------------------------------------------------------
const unsigned char wxStringOperationsUtf8::ms_utf8IterTable[256] = {
// single-byte sequences (ASCII):
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 00..0F
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 10..1F
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 20..2F
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 30..3F
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 40..4F
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 50..5F
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 60..6F
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 70..7F
// these are invalid, we use step 1 to skip
// over them (should never happen):
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 80..8F
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 90..9F
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // A0..AF
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // B0..BF
1, 1, // C0,C1
// two-byte sequences:
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // C2..CF
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // D0..DF
// three-byte sequences:
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // E0..EF
// four-byte sequences:
4, 4, 4, 4, 4, // F0..F4
// these are invalid again (5- or 6-byte
// sequences and sequences for code points
// above U+10FFFF, as restricted by RFC 3629):
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // F5..FF
};
static unsigned char wxStringOperationsUtf8::GetUTF8IterOffset(unsigned char c)
{
unsigned char l = tableUtf8Lengths[c];
if ( !l ) //skip over invalid characters
l = 1;
return l;
}
// ---------------------------------------------------------------------------
// UTF-8 operations
@@ -166,7 +141,7 @@ bool wxStringOperationsUtf8::IsValidUtf8String(const char *str, size_t len)
{
// if the string is not NULL-terminated, verify we have enough
// bytes in it left for current character's encoding:
if ( c + ms_utf8IterTable[*c] > end )
if ( c + GetUTF8IterOffset(*c) > end )
return false;
}
@@ -364,7 +339,7 @@ wxCharBuffer wxStringOperationsUtf8::EncodeNChars(size_t n, const wxUniChar& ch)
{
Utf8CharBuffer once(EncodeChar(ch));
// the IncIter() table can be used to determine the length of ch's encoding:
size_t len = ms_utf8IterTable[(unsigned char)once.data[0]];
size_t len = GetUTF8IterOffset(once.data[0]);
wxCharBuffer buf(n * len);
char *ptr = buf.data();