sgml: add sgmlerr
Signed-off-by: Simon Rozman <simon@rozman.si>
This commit is contained in:
parent
fdf16a65b6
commit
d981225ba8
@ -78,6 +78,79 @@ namespace stdex
|
||||
constexpr int sgml_c = sgml_amp | sgml_bsol | sgml_quot_apos;
|
||||
// constexpr int sgml_kolos = sgml_amp | sgml_quot | sgml_dollar | sgml_percnt | sgml_lt_gt | sgml_bsol/* | sgml_commat | sgml_num*/ | sgml_lpar_rpar | sgml_lcub_rcub | sgml_lsqb_rsqb;
|
||||
|
||||
///
|
||||
/// Checks SGML string for error
|
||||
///
|
||||
/// \param[in] src SGML string
|
||||
/// \param[in] count_src SGML string character count limit
|
||||
/// \param[in] what Bitwise flag of stdex::sgml_* constants that force extra checks. Currently, only stdex::sgml_full is used, which enforces 7-bit/ASCII checking.
|
||||
///
|
||||
/// \return Index of error; or stdex::npos if no error detected.
|
||||
///
|
||||
template <class T_from>
|
||||
size_t sgmlerr(
|
||||
_In_reads_or_z_opt_(count_src) const T_from* src, _In_ size_t count_src,
|
||||
_In_ int what = 0)
|
||||
{
|
||||
_Assume_(src || !count_src);
|
||||
|
||||
const bool
|
||||
do_ascii = (what & sgml_full) == 0;
|
||||
|
||||
for (size_t i = 0; i < count_src && src[i];) {
|
||||
if (src[i] == '&') {
|
||||
auto end = sgmlend(src + i + 1, count_src - i - 1);
|
||||
if (end) {
|
||||
const wchar_t* entity_w;
|
||||
wchar_t chr[3];
|
||||
size_t n = end - src - i - 1;
|
||||
if (n >= 2 && src[i + 1] == '#') {
|
||||
uint32_t unicode;
|
||||
if (src[i + 2] == 'x' || src[i + 2] == 'X')
|
||||
unicode = strtou32(src + i + 3, n - 2, nullptr, 16);
|
||||
else
|
||||
unicode = strtou32(src + i + 2, n - 1, nullptr, 10);
|
||||
#ifdef _WIN32
|
||||
if (unicode < 0x10000) {
|
||||
chr[0] = (wchar_t)unicode;
|
||||
chr[1] = 0;
|
||||
}
|
||||
else {
|
||||
ucs4_to_surrogate_pair(chr, unicode);
|
||||
chr[2] = 0;
|
||||
}
|
||||
#else
|
||||
chr[0] = (wchar_t)unicode;
|
||||
chr[1] = 0;
|
||||
#endif
|
||||
entity_w = chr;
|
||||
}
|
||||
else
|
||||
entity_w = sgml2uni(src + i + 1, n);
|
||||
|
||||
if (entity_w) {
|
||||
i = end - src + 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Unknown entity.
|
||||
return i;
|
||||
}
|
||||
|
||||
// Unterminated entity.
|
||||
return i;
|
||||
}
|
||||
|
||||
if (do_ascii && !is7bit(src[i])) {
|
||||
// Non-ASCII character
|
||||
return i;
|
||||
}
|
||||
i++;
|
||||
}
|
||||
|
||||
return npos;
|
||||
}
|
||||
|
||||
///
|
||||
/// Convert SGML string to Unicode (UTF-16 on Windows) and append to string
|
||||
///
|
||||
|
Loading…
x
Reference in New Issue
Block a user