From af6ef9555a93ee8a7c9396a466a881c2029cba58 Mon Sep 17 00:00:00 2001 From: Karl Waclawek Date: Mon, 2 May 2016 00:02:44 +0200 Subject: [PATCH 1/8] Fix overflow (v2) (Some post-processing by Sebastian Pipping) --- src/expat/lib/xmlparse.c | 7 +++-- src/expat/lib/xmltok.c | 20 ++++++------ src/expat/lib/xmltok_impl.c | 62 ++++++++++++++++++------------------- 3 files changed, 47 insertions(+), 42 deletions(-) diff --git a/src/expat/lib/xmlparse.c b/src/expat/lib/xmlparse.c index dbf5687613..974db712f4 100644 --- a/src/expat/lib/xmlparse.c +++ b/src/expat/lib/xmlparse.c @@ -5372,7 +5372,7 @@ reportDefault(XML_Parser parser, const ENCODING *enc, *eventEndPP = s; defaultHandler(handlerArg, dataBuf, (int)(dataPtr - (ICHAR *)dataBuf)); *eventPP = s; - } while (s != end); + } while (s < end); } else defaultHandler(handlerArg, (XML_Char *)s, (int)((XML_Char *)end - (XML_Char *)s)); @@ -6174,12 +6174,15 @@ static XML_Char * poolAppend(STRING_POOL *pool, const ENCODING *enc, const char *ptr, const char *end) { + ICHAR* poolPtrPrev = NULL; if (!pool->ptr && !poolGrow(pool)) return NULL; for (;;) { XmlConvert(enc, &ptr, end, (ICHAR **)&(pool->ptr), (ICHAR *)pool->end); - if (ptr == end) + /* complete or zero progress? */ + if (ptr == end || pool->ptr == poolPtrPrev) break; + poolPtrPrev = pool->ptr; if (!poolGrow(pool)) return NULL; } diff --git a/src/expat/lib/xmltok.c b/src/expat/lib/xmltok.c index c3f1c03384..a29f241cb6 100644 --- a/src/expat/lib/xmltok.c +++ b/src/expat/lib/xmltok.c @@ -335,7 +335,7 @@ utf8_toUtf8(const ENCODING *enc, if (((unsigned char)fromLim[-1] & 0xc0) != 0x80) break; } - for (to = *toP, from = *fromP; from != fromLim; from++, to++) + for (to = *toP, from = *fromP; from < fromLim; from++, to++) *to = *from; *fromP = from; *toP = to; @@ -348,7 +348,7 @@ utf8_toUtf16(const ENCODING *enc, { unsigned short *to = *toP; const char *from = *fromP; - while (from != fromLim && to != toLim) { + while (from < fromLim && to < toLim) { switch (((struct normal_encoding *)enc)->type[(unsigned char)*from]) { case BT_LEAD2: *to++ = (unsigned short)(((from[0] & 0x1f) << 6) | (from[1] & 0x3f)); @@ -459,7 +459,7 @@ latin1_toUtf16(const ENCODING *enc, const char **fromP, const char *fromLim, unsigned short **toP, const unsigned short *toLim) { - while (*fromP != fromLim && *toP != toLim) + while (*fromP < fromLim && *toP < toLim) *(*toP)++ = (unsigned char)*(*fromP)++; } @@ -492,7 +492,7 @@ ascii_toUtf8(const ENCODING *enc, const char **fromP, const char *fromLim, char **toP, const char *toLim) { - while (*fromP != fromLim && *toP != toLim) + while (*fromP < fromLim && *toP < toLim) *(*toP)++ = *(*fromP)++; } @@ -545,8 +545,9 @@ E ## toUtf8(const ENCODING *enc, \ const char **fromP, const char *fromLim, \ char **toP, const char *toLim) \ { \ - const char *from; \ - for (from = *fromP; from != fromLim; from += 2) { \ + const char *from = *fromP; \ + fromLim = from + (((fromLim - from) >> 1) << 1); /* shrink to even */ \ + for (; from < fromLim; from += 2) { \ int plane; \ unsigned char lo2; \ unsigned char lo = GET_LO(from); \ @@ -608,11 +609,12 @@ E ## toUtf16(const ENCODING *enc, \ const char **fromP, const char *fromLim, \ unsigned short **toP, const unsigned short *toLim) \ { \ + fromLim = *fromP + (((fromLim - *fromP) >> 1) << 1); /* shrink to even */ \ /* Avoid copying first half only of surrogate */ \ if (fromLim - *fromP > ((toLim - *toP) << 1) \ && (GET_HI(fromLim - 2) & 0xF8) == 0xD8) \ fromLim -= 2; \ - for (; *fromP != fromLim && *toP != toLim; *fromP += 2) \ + for (; *fromP < fromLim && *toP < toLim; *fromP += 2) \ *(*toP)++ = (GET_HI(*fromP) << 8) | GET_LO(*fromP); \ } @@ -1332,7 +1334,7 @@ unknown_toUtf16(const ENCODING *enc, unsigned short **toP, const unsigned short *toLim) { const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc); - while (*fromP != fromLim && *toP != toLim) { + while (*fromP < fromLim && *toP < toLim) { unsigned short c = uenc->utf16[(unsigned char)**fromP]; if (c == 0) { c = (unsigned short) @@ -1507,7 +1509,7 @@ initScan(const ENCODING * const *encodingTable, { const ENCODING **encPtr; - if (ptr == end) + if (ptr >= end) return XML_TOK_NONE; encPtr = enc->encPtr; if (ptr + 1 == end) { diff --git a/src/expat/lib/xmltok_impl.c b/src/expat/lib/xmltok_impl.c index 9c2895b877..6c5a3ba4c0 100644 --- a/src/expat/lib/xmltok_impl.c +++ b/src/expat/lib/xmltok_impl.c @@ -93,13 +93,13 @@ static int PTRCALL PREFIX(scanComment)(const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr) { - if (ptr != end) { + if (ptr < end) { if (!CHAR_MATCHES(enc, ptr, ASCII_MINUS)) { *nextTokPtr = ptr; return XML_TOK_INVALID; } ptr += MINBPC(enc); - while (ptr != end) { + while (ptr < end) { switch (BYTE_TYPE(enc, ptr)) { INVALID_CASES(ptr, nextTokPtr) case BT_MINUS: @@ -147,7 +147,7 @@ PREFIX(scanDecl)(const ENCODING *enc, const char *ptr, *nextTokPtr = ptr; return XML_TOK_INVALID; } - while (ptr != end) { + while (ptr < end) { switch (BYTE_TYPE(enc, ptr)) { case BT_PERCNT: if (ptr + MINBPC(enc) == end) @@ -233,7 +233,7 @@ PREFIX(scanPi)(const ENCODING *enc, const char *ptr, *nextTokPtr = ptr; return XML_TOK_INVALID; } - while (ptr != end) { + while (ptr < end) { switch (BYTE_TYPE(enc, ptr)) { CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) case BT_S: case BT_CR: case BT_LF: @@ -242,7 +242,7 @@ PREFIX(scanPi)(const ENCODING *enc, const char *ptr, return XML_TOK_INVALID; } ptr += MINBPC(enc); - while (ptr != end) { + while (ptr < end) { switch (BYTE_TYPE(enc, ptr)) { INVALID_CASES(ptr, nextTokPtr) case BT_QUEST: @@ -305,7 +305,7 @@ static int PTRCALL PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr) { - if (ptr == end) + if (ptr >= end) return XML_TOK_NONE; if (MINBPC(enc) > 1) { size_t n = end - ptr; @@ -348,7 +348,7 @@ PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr, ptr += MINBPC(enc); break; } - while (ptr != end) { + while (ptr < end) { switch (BYTE_TYPE(enc, ptr)) { #define LEAD_CASE(n) \ case BT_LEAD ## n: \ @@ -391,11 +391,11 @@ PREFIX(scanEndTag)(const ENCODING *enc, const char *ptr, *nextTokPtr = ptr; return XML_TOK_INVALID; } - while (ptr != end) { + while (ptr < end) { switch (BYTE_TYPE(enc, ptr)) { CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) case BT_S: case BT_CR: case BT_LF: - for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) { + for (ptr += MINBPC(enc); ptr < end; ptr += MINBPC(enc)) { switch (BYTE_TYPE(enc, ptr)) { case BT_S: case BT_CR: case BT_LF: break; @@ -432,7 +432,7 @@ static int PTRCALL PREFIX(scanHexCharRef)(const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr) { - if (ptr != end) { + if (ptr < end) { switch (BYTE_TYPE(enc, ptr)) { case BT_DIGIT: case BT_HEX: @@ -441,7 +441,7 @@ PREFIX(scanHexCharRef)(const ENCODING *enc, const char *ptr, *nextTokPtr = ptr; return XML_TOK_INVALID; } - for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) { + for (ptr += MINBPC(enc); ptr < end; ptr += MINBPC(enc)) { switch (BYTE_TYPE(enc, ptr)) { case BT_DIGIT: case BT_HEX: @@ -464,7 +464,7 @@ static int PTRCALL PREFIX(scanCharRef)(const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr) { - if (ptr != end) { + if (ptr < end) { if (CHAR_MATCHES(enc, ptr, ASCII_x)) return PREFIX(scanHexCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); switch (BYTE_TYPE(enc, ptr)) { @@ -474,7 +474,7 @@ PREFIX(scanCharRef)(const ENCODING *enc, const char *ptr, *nextTokPtr = ptr; return XML_TOK_INVALID; } - for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) { + for (ptr += MINBPC(enc); ptr < end; ptr += MINBPC(enc)) { switch (BYTE_TYPE(enc, ptr)) { case BT_DIGIT: break; @@ -506,7 +506,7 @@ PREFIX(scanRef)(const ENCODING *enc, const char *ptr, const char *end, *nextTokPtr = ptr; return XML_TOK_INVALID; } - while (ptr != end) { + while (ptr < end) { switch (BYTE_TYPE(enc, ptr)) { CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) case BT_SEMI: @@ -529,7 +529,7 @@ PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end, #ifdef XML_NS int hadColon = 0; #endif - while (ptr != end) { + while (ptr < end) { switch (BYTE_TYPE(enc, ptr)) { CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) #ifdef XML_NS @@ -716,7 +716,7 @@ PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end, hadColon = 0; #endif /* we have a start-tag */ - while (ptr != end) { + while (ptr < end) { switch (BYTE_TYPE(enc, ptr)) { CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) #ifdef XML_NS @@ -740,7 +740,7 @@ PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end, case BT_S: case BT_CR: case BT_LF: { ptr += MINBPC(enc); - while (ptr != end) { + while (ptr < end) { switch (BYTE_TYPE(enc, ptr)) { CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) case BT_GT: @@ -785,7 +785,7 @@ static int PTRCALL PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr) { - if (ptr == end) + if (ptr >= end) return XML_TOK_NONE; if (MINBPC(enc) > 1) { size_t n = end - ptr; @@ -832,7 +832,7 @@ PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end, ptr += MINBPC(enc); break; } - while (ptr != end) { + while (ptr < end) { switch (BYTE_TYPE(enc, ptr)) { #define LEAD_CASE(n) \ case BT_LEAD ## n: \ @@ -895,7 +895,7 @@ PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end, *nextTokPtr = ptr; return XML_TOK_INVALID; } - while (ptr != end) { + while (ptr < end) { switch (BYTE_TYPE(enc, ptr)) { CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) case BT_SEMI: @@ -921,7 +921,7 @@ PREFIX(scanPoundName)(const ENCODING *enc, const char *ptr, const char *end, *nextTokPtr = ptr; return XML_TOK_INVALID; } - while (ptr != end) { + while (ptr < end) { switch (BYTE_TYPE(enc, ptr)) { CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) case BT_CR: case BT_LF: case BT_S: @@ -941,7 +941,7 @@ PREFIX(scanLit)(int open, const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr) { - while (ptr != end) { + while (ptr < end) { int t = BYTE_TYPE(enc, ptr); switch (t) { INVALID_CASES(ptr, nextTokPtr) @@ -973,7 +973,7 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr) { int tok; - if (ptr == end) + if (ptr >= end) return XML_TOK_NONE; if (MINBPC(enc) > 1) { size_t n = end - ptr; @@ -1141,7 +1141,7 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, *nextTokPtr = ptr; return XML_TOK_INVALID; } - while (ptr != end) { + while (ptr < end) { switch (BYTE_TYPE(enc, ptr)) { CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) case BT_GT: case BT_RPAR: case BT_COMMA: @@ -1204,10 +1204,10 @@ PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr) { const char *start; - if (ptr == end) + if (ptr >= end) return XML_TOK_NONE; start = ptr; - while (ptr != end) { + while (ptr < end) { switch (BYTE_TYPE(enc, ptr)) { #define LEAD_CASE(n) \ case BT_LEAD ## n: ptr += n; break; @@ -1262,10 +1262,10 @@ PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr) { const char *start; - if (ptr == end) + if (ptr >= end) return XML_TOK_NONE; start = ptr; - while (ptr != end) { + while (ptr < end) { switch (BYTE_TYPE(enc, ptr)) { #define LEAD_CASE(n) \ case BT_LEAD ## n: ptr += n; break; @@ -1326,7 +1326,7 @@ PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr, end = ptr + n; } } - while (ptr != end) { + while (ptr < end) { switch (BYTE_TYPE(enc, ptr)) { INVALID_CASES(ptr, nextTokPtr) case BT_LT: @@ -1373,7 +1373,7 @@ PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end, { ptr += MINBPC(enc); end -= MINBPC(enc); - for (; ptr != end; ptr += MINBPC(enc)) { + for (; ptr < end; ptr += MINBPC(enc)) { switch (BYTE_TYPE(enc, ptr)) { case BT_DIGIT: case BT_HEX: @@ -1760,7 +1760,7 @@ PREFIX(updatePosition)(const ENCODING *enc, case BT_CR: pos->lineNumber++; ptr += MINBPC(enc); - if (ptr != end && BYTE_TYPE(enc, ptr) == BT_LF) + if (ptr < end && BYTE_TYPE(enc, ptr) == BT_LF) ptr += MINBPC(enc); pos->columnNumber = (XML_Size)-1; break; From 66102231d3ccbe3b163f9ab57b615ab04fea9de7 Mon Sep 17 00:00:00 2001 From: Gustavo Grieco Date: Mon, 2 May 2016 00:35:34 +0200 Subject: [PATCH 2/8] Fix two integer overflows --- src/expat/lib/xmlparse.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/expat/lib/xmlparse.c b/src/expat/lib/xmlparse.c index 974db712f4..a09071f3f4 100644 --- a/src/expat/lib/xmlparse.c +++ b/src/expat/lib/xmlparse.c @@ -6265,8 +6265,13 @@ poolGrow(STRING_POOL *pool) } } if (pool->blocks && pool->start == pool->blocks->s) { + BLOCK *temp; int blockSize = (int)(pool->end - pool->start)*2; - BLOCK *temp = (BLOCK *) + + if (blockSize < 0) + return XML_FALSE; + + temp = (BLOCK *) pool->mem->realloc_fcn(pool->blocks, (offsetof(BLOCK, s) + blockSize * sizeof(XML_Char))); @@ -6281,6 +6286,10 @@ poolGrow(STRING_POOL *pool) else { BLOCK *tem; int blockSize = (int)(pool->end - pool->start); + + if (blockSize < 0) + return XML_FALSE; + if (blockSize < INIT_BLOCK_SIZE) blockSize = INIT_BLOCK_SIZE; else From c5efe0c7db162504ee89094437ccdafce6e7bee0 Mon Sep 17 00:00:00 2001 From: Sebastian Pipping Date: Sun, 1 May 2016 23:40:05 +0200 Subject: [PATCH 3/8] Prevent out-of-bounds access in text conversion * big2_toUtf8 * little2_toUtf8 * utf8_toUtf8 * utf8_toUtf16 --- src/expat/lib/xmltok.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/expat/lib/xmltok.c b/src/expat/lib/xmltok.c index a29f241cb6..630df7fdaa 100644 --- a/src/expat/lib/xmltok.c +++ b/src/expat/lib/xmltok.c @@ -335,7 +335,7 @@ utf8_toUtf8(const ENCODING *enc, if (((unsigned char)fromLim[-1] & 0xc0) != 0x80) break; } - for (to = *toP, from = *fromP; from < fromLim; from++, to++) + for (to = *toP, from = *fromP; (from < fromLim) && (to < toLim); from++, to++) *to = *from; *fromP = from; *toP = to; @@ -351,10 +351,14 @@ utf8_toUtf16(const ENCODING *enc, while (from < fromLim && to < toLim) { switch (((struct normal_encoding *)enc)->type[(unsigned char)*from]) { case BT_LEAD2: + if (from + 2 > fromLim) + break; *to++ = (unsigned short)(((from[0] & 0x1f) << 6) | (from[1] & 0x3f)); from += 2; break; case BT_LEAD3: + if (from + 3 > fromLim) + break; *to++ = (unsigned short)(((from[0] & 0xf) << 12) | ((from[1] & 0x3f) << 6) | (from[2] & 0x3f)); from += 3; @@ -364,6 +368,8 @@ utf8_toUtf16(const ENCODING *enc, unsigned long n; if (to + 1 == toLim) goto after; + if (from + 4 > fromLim) + goto after; n = ((from[0] & 0x7) << 18) | ((from[1] & 0x3f) << 12) | ((from[2] & 0x3f) << 6) | (from[3] & 0x3f); n -= 0x10000; @@ -583,7 +589,7 @@ E ## toUtf8(const ENCODING *enc, \ *(*toP)++ = ((lo & 0x3f) | 0x80); \ break; \ case 0xD8: case 0xD9: case 0xDA: case 0xDB: \ - if (toLim - *toP < 4) { \ + if ((toLim - *toP < 4) || (from + 4 > fromLim)) { \ *fromP = from; \ return; \ } \ From 63688b5e588e4c7f98f58b5256ea34a31d18f72b Mon Sep 17 00:00:00 2001 From: Sebastian Pipping Date: Sun, 1 May 2016 23:51:26 +0200 Subject: [PATCH 4/8] Make converters tell state on termination (v3) --- src/expat/lib/xmltok.c | 97 +++++++++++++++++++++++++++++++----------- src/expat/lib/xmltok.h | 10 ++++- 2 files changed, 80 insertions(+), 27 deletions(-) diff --git a/src/expat/lib/xmltok.c b/src/expat/lib/xmltok.c index 630df7fdaa..e69966fe41 100644 --- a/src/expat/lib/xmltok.c +++ b/src/expat/lib/xmltok.c @@ -322,15 +322,17 @@ enum { /* UTF8_cvalN is value of masked first byte of N byte sequence */ UTF8_cval4 = 0xf0 }; -static void PTRCALL +static enum XML_Convert_Result PTRCALL utf8_toUtf8(const ENCODING *enc, const char **fromP, const char *fromLim, char **toP, const char *toLim) { + enum XML_Convert_Result res = XML_CONVERT_COMPLETED; char *to; const char *from; if (fromLim - *fromP > toLim - *toP) { /* Avoid copying partial characters. */ + res = XML_CONVERT_OUTPUT_EXHAUSTED; for (fromLim = *fromP + (toLim - *toP); fromLim > *fromP; fromLim--) if (((unsigned char)fromLim[-1] & 0xc0) != 0x80) break; @@ -339,26 +341,36 @@ utf8_toUtf8(const ENCODING *enc, *to = *from; *fromP = from; *toP = to; + + if ((to == toLim) && (from < fromLim)) + return XML_CONVERT_OUTPUT_EXHAUSTED; + else + return res; } -static void PTRCALL +static enum XML_Convert_Result PTRCALL utf8_toUtf16(const ENCODING *enc, const char **fromP, const char *fromLim, unsigned short **toP, const unsigned short *toLim) { + enum XML_Convert_Result res = XML_CONVERT_COMPLETED; unsigned short *to = *toP; const char *from = *fromP; while (from < fromLim && to < toLim) { switch (((struct normal_encoding *)enc)->type[(unsigned char)*from]) { case BT_LEAD2: - if (from + 2 > fromLim) + if (from + 2 > fromLim) { + res = XML_CONVERT_INPUT_INCOMPLETE; break; + } *to++ = (unsigned short)(((from[0] & 0x1f) << 6) | (from[1] & 0x3f)); from += 2; break; case BT_LEAD3: - if (from + 3 > fromLim) + if (from + 3 > fromLim) { + res = XML_CONVERT_INPUT_INCOMPLETE; break; + } *to++ = (unsigned short)(((from[0] & 0xf) << 12) | ((from[1] & 0x3f) << 6) | (from[2] & 0x3f)); from += 3; @@ -366,10 +378,14 @@ utf8_toUtf16(const ENCODING *enc, case BT_LEAD4: { unsigned long n; - if (to + 1 == toLim) + if (to + 2 > toLim) { + res = XML_CONVERT_OUTPUT_EXHAUSTED; goto after; - if (from + 4 > fromLim) + } + if (from + 4 > fromLim) { + res = XML_CONVERT_INPUT_INCOMPLETE; goto after; + } n = ((from[0] & 0x7) << 18) | ((from[1] & 0x3f) << 12) | ((from[2] & 0x3f) << 6) | (from[3] & 0x3f); n -= 0x10000; @@ -387,6 +403,7 @@ utf8_toUtf16(const ENCODING *enc, after: *fromP = from; *toP = to; + return res; } #ifdef XML_NS @@ -435,7 +452,7 @@ static const struct normal_encoding internal_utf8_encoding = { STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_) }; -static void PTRCALL +static enum XML_Convert_Result PTRCALL latin1_toUtf8(const ENCODING *enc, const char **fromP, const char *fromLim, char **toP, const char *toLim) @@ -443,30 +460,35 @@ latin1_toUtf8(const ENCODING *enc, for (;;) { unsigned char c; if (*fromP == fromLim) - break; + return XML_CONVERT_COMPLETED; c = (unsigned char)**fromP; if (c & 0x80) { if (toLim - *toP < 2) - break; + return XML_CONVERT_OUTPUT_EXHAUSTED; *(*toP)++ = (char)((c >> 6) | UTF8_cval2); *(*toP)++ = (char)((c & 0x3f) | 0x80); (*fromP)++; } else { if (*toP == toLim) - break; + return XML_CONVERT_OUTPUT_EXHAUSTED; *(*toP)++ = *(*fromP)++; } } } -static void PTRCALL +static enum XML_Convert_Result PTRCALL latin1_toUtf16(const ENCODING *enc, const char **fromP, const char *fromLim, unsigned short **toP, const unsigned short *toLim) { while (*fromP < fromLim && *toP < toLim) *(*toP)++ = (unsigned char)*(*fromP)++; + + if ((*toP == toLim) && (*fromP < fromLim)) + return XML_CONVERT_OUTPUT_EXHAUSTED; + else + return XML_CONVERT_COMPLETED; } #ifdef XML_NS @@ -493,13 +515,18 @@ static const struct normal_encoding latin1_encoding = { STANDARD_VTABLE(sb_) }; -static void PTRCALL +static enum XML_Convert_Result PTRCALL ascii_toUtf8(const ENCODING *enc, const char **fromP, const char *fromLim, char **toP, const char *toLim) { while (*fromP < fromLim && *toP < toLim) *(*toP)++ = *(*fromP)++; + + if ((*toP == toLim) && (*fromP < fromLim)) + return XML_CONVERT_OUTPUT_EXHAUSTED; + else + return XML_CONVERT_COMPLETED; } #ifdef XML_NS @@ -546,7 +573,7 @@ unicode_byte_type(char hi, char lo) } #define DEFINE_UTF16_TO_UTF8(E) \ -static void PTRCALL \ +static enum XML_Convert_Result PTRCALL \ E ## toUtf8(const ENCODING *enc, \ const char **fromP, const char *fromLim, \ char **toP, const char *toLim) \ @@ -563,7 +590,7 @@ E ## toUtf8(const ENCODING *enc, \ if (lo < 0x80) { \ if (*toP == toLim) { \ *fromP = from; \ - return; \ + return XML_CONVERT_OUTPUT_EXHAUSTED; \ } \ *(*toP)++ = lo; \ break; \ @@ -573,7 +600,7 @@ E ## toUtf8(const ENCODING *enc, \ case 0x4: case 0x5: case 0x6: case 0x7: \ if (toLim - *toP < 2) { \ *fromP = from; \ - return; \ + return XML_CONVERT_OUTPUT_EXHAUSTED; \ } \ *(*toP)++ = ((lo >> 6) | (hi << 2) | UTF8_cval2); \ *(*toP)++ = ((lo & 0x3f) | 0x80); \ @@ -581,7 +608,7 @@ E ## toUtf8(const ENCODING *enc, \ default: \ if (toLim - *toP < 3) { \ *fromP = from; \ - return; \ + return XML_CONVERT_OUTPUT_EXHAUSTED; \ } \ /* 16 bits divided 4, 6, 6 amongst 3 bytes */ \ *(*toP)++ = ((hi >> 4) | UTF8_cval3); \ @@ -589,9 +616,13 @@ E ## toUtf8(const ENCODING *enc, \ *(*toP)++ = ((lo & 0x3f) | 0x80); \ break; \ case 0xD8: case 0xD9: case 0xDA: case 0xDB: \ - if ((toLim - *toP < 4) || (from + 4 > fromLim)) { \ + if (toLim - *toP < 4) { \ *fromP = from; \ - return; \ + return XML_CONVERT_OUTPUT_EXHAUSTED; \ + } \ + if (from + 4 > fromLim) { \ + *fromP = from; \ + return XML_CONVERT_INPUT_INCOMPLETE; \ } \ plane = (((hi & 0x3) << 2) | ((lo >> 6) & 0x3)) + 1; \ *(*toP)++ = ((plane >> 2) | UTF8_cval4); \ @@ -607,21 +638,32 @@ E ## toUtf8(const ENCODING *enc, \ } \ } \ *fromP = from; \ + if (from < fromLim) \ + return XML_CONVERT_INPUT_INCOMPLETE; \ + else \ + return XML_CONVERT_COMPLETED; \ } #define DEFINE_UTF16_TO_UTF16(E) \ -static void PTRCALL \ +static enum XML_Convert_Result PTRCALL \ E ## toUtf16(const ENCODING *enc, \ const char **fromP, const char *fromLim, \ unsigned short **toP, const unsigned short *toLim) \ { \ + enum XML_Convert_Result res = XML_CONVERT_COMPLETED; \ fromLim = *fromP + (((fromLim - *fromP) >> 1) << 1); /* shrink to even */ \ /* Avoid copying first half only of surrogate */ \ if (fromLim - *fromP > ((toLim - *toP) << 1) \ - && (GET_HI(fromLim - 2) & 0xF8) == 0xD8) \ + && (GET_HI(fromLim - 2) & 0xF8) == 0xD8) { \ fromLim -= 2; \ + res = XML_CONVERT_INPUT_INCOMPLETE; \ + } \ for (; *fromP < fromLim && *toP < toLim; *fromP += 2) \ *(*toP)++ = (GET_HI(*fromP) << 8) | GET_LO(*fromP); \ + if ((*toP == toLim) && (*fromP < fromLim)) \ + return XML_CONVERT_OUTPUT_EXHAUSTED; \ + else \ + return res; \ } #define SET2(ptr, ch) \ @@ -1300,7 +1342,7 @@ unknown_isInvalid(const ENCODING *enc, const char *p) return (c & ~0xFFFF) || checkCharRefNumber(c) < 0; } -static void PTRCALL +static enum XML_Convert_Result PTRCALL unknown_toUtf8(const ENCODING *enc, const char **fromP, const char *fromLim, char **toP, const char *toLim) @@ -1311,21 +1353,21 @@ unknown_toUtf8(const ENCODING *enc, const char *utf8; int n; if (*fromP == fromLim) - break; + return XML_CONVERT_COMPLETED; utf8 = uenc->utf8[(unsigned char)**fromP]; n = *utf8++; if (n == 0) { int c = uenc->convert(uenc->userData, *fromP); n = XmlUtf8Encode(c, buf); if (n > toLim - *toP) - break; + return XML_CONVERT_OUTPUT_EXHAUSTED; utf8 = buf; *fromP += (AS_NORMAL_ENCODING(enc)->type[(unsigned char)**fromP] - (BT_LEAD2 - 2)); } else { if (n > toLim - *toP) - break; + return XML_CONVERT_OUTPUT_EXHAUSTED; (*fromP)++; } do { @@ -1334,7 +1376,7 @@ unknown_toUtf8(const ENCODING *enc, } } -static void PTRCALL +static enum XML_Convert_Result PTRCALL unknown_toUtf16(const ENCODING *enc, const char **fromP, const char *fromLim, unsigned short **toP, const unsigned short *toLim) @@ -1352,6 +1394,11 @@ unknown_toUtf16(const ENCODING *enc, (*fromP)++; *(*toP)++ = c; } + + if ((*toP == toLim) && (*fromP < fromLim)) + return XML_CONVERT_OUTPUT_EXHAUSTED; + else + return XML_CONVERT_COMPLETED; } ENCODING * diff --git a/src/expat/lib/xmltok.h b/src/expat/lib/xmltok.h index ca867aa6b4..752007e8b9 100644 --- a/src/expat/lib/xmltok.h +++ b/src/expat/lib/xmltok.h @@ -130,6 +130,12 @@ typedef int (PTRCALL *SCANNER)(const ENCODING *, const char *, const char **); +enum XML_Convert_Result { + XML_CONVERT_COMPLETED = 0, + XML_CONVERT_INPUT_INCOMPLETE = 1, + XML_CONVERT_OUTPUT_EXHAUSTED = 2 /* and therefore potentially input remaining as well */ +}; + struct encoding { SCANNER scanners[XML_N_STATES]; SCANNER literalScanners[XML_N_LITERAL_TYPES]; @@ -158,12 +164,12 @@ struct encoding { const char *ptr, const char *end, const char **badPtr); - void (PTRCALL *utf8Convert)(const ENCODING *enc, + enum XML_Convert_Result (PTRCALL *utf8Convert)(const ENCODING *enc, const char **fromP, const char *fromLim, char **toP, const char *toLim); - void (PTRCALL *utf16Convert)(const ENCODING *enc, + enum XML_Convert_Result (PTRCALL *utf16Convert)(const ENCODING *enc, const char **fromP, const char *fromLim, unsigned short **toP, From f88eff9cdbfdfcc034f0149ee4d5c803f1c8025a Mon Sep 17 00:00:00 2001 From: Sebastian Pipping Date: Sun, 1 May 2016 23:55:02 +0200 Subject: [PATCH 5/8] Do not grow pool to out-of-memory for incomplete input --- src/expat/lib/xmlparse.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/expat/lib/xmlparse.c b/src/expat/lib/xmlparse.c index a09071f3f4..840f255d23 100644 --- a/src/expat/lib/xmlparse.c +++ b/src/expat/lib/xmlparse.c @@ -6174,15 +6174,12 @@ static XML_Char * poolAppend(STRING_POOL *pool, const ENCODING *enc, const char *ptr, const char *end) { - ICHAR* poolPtrPrev = NULL; if (!pool->ptr && !poolGrow(pool)) return NULL; for (;;) { - XmlConvert(enc, &ptr, end, (ICHAR **)&(pool->ptr), (ICHAR *)pool->end); - /* complete or zero progress? */ - if (ptr == end || pool->ptr == poolPtrPrev) + const enum XML_Convert_Result convert_res = XmlConvert(enc, &ptr, end, (ICHAR **)&(pool->ptr), (ICHAR *)pool->end); + if ((convert_res == XML_CONVERT_COMPLETED) || (convert_res == XML_CONVERT_INPUT_INCOMPLETE)) break; - poolPtrPrev = pool->ptr; if (!poolGrow(pool)) return NULL; } From 5ea067da70850393c1d0a40031340404b1510804 Mon Sep 17 00:00:00 2001 From: Sebastian Pipping Date: Sun, 1 May 2016 23:57:49 +0200 Subject: [PATCH 6/8] Complete XmlConvert return value handling --- src/expat/lib/xmlparse.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/src/expat/lib/xmlparse.c b/src/expat/lib/xmlparse.c index 840f255d23..09c9875844 100644 --- a/src/expat/lib/xmlparse.c +++ b/src/expat/lib/xmlparse.c @@ -2439,11 +2439,11 @@ doContent(XML_Parser parser, for (;;) { int bufSize; int convLen; - XmlConvert(enc, + const enum XML_Convert_Result convert_res = XmlConvert(enc, &fromPtr, rawNameEnd, (ICHAR **)&toPtr, (ICHAR *)tag->bufEnd - 1); convLen = (int)(toPtr - (XML_Char *)tag->buf); - if (fromPtr == rawNameEnd) { + if ((convert_res == XML_CONVERT_COMPLETED) || (convert_res == XML_CONVERT_INPUT_INCOMPLETE)) { tag->name.strLen = convLen; break; } @@ -2664,11 +2664,11 @@ doContent(XML_Parser parser, if (MUST_CONVERT(enc, s)) { for (;;) { ICHAR *dataPtr = (ICHAR *)dataBuf; - XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)dataBufEnd); + const enum XML_Convert_Result convert_res = XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)dataBufEnd); *eventEndPP = s; charDataHandler(handlerArg, dataBuf, (int)(dataPtr - (ICHAR *)dataBuf)); - if (s == next) + if ((convert_res == XML_CONVERT_COMPLETED) || (convert_res == XML_CONVERT_INPUT_INCOMPLETE)) break; *eventPP = s; } @@ -3274,11 +3274,11 @@ doCdataSection(XML_Parser parser, if (MUST_CONVERT(enc, s)) { for (;;) { ICHAR *dataPtr = (ICHAR *)dataBuf; - XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)dataBufEnd); + const enum XML_Convert_Result convert_res = XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)dataBufEnd); *eventEndPP = next; charDataHandler(handlerArg, dataBuf, (int)(dataPtr - (ICHAR *)dataBuf)); - if (s == next) + if ((convert_res == XML_CONVERT_COMPLETED) || (convert_res == XML_CONVERT_INPUT_INCOMPLETE)) break; *eventPP = s; } @@ -5356,6 +5356,7 @@ reportDefault(XML_Parser parser, const ENCODING *enc, const char *s, const char *end) { if (MUST_CONVERT(enc, s)) { + enum XML_Convert_Result convert_res; const char **eventPP; const char **eventEndPP; if (enc == encoding) { @@ -5368,11 +5369,11 @@ reportDefault(XML_Parser parser, const ENCODING *enc, } do { ICHAR *dataPtr = (ICHAR *)dataBuf; - XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)dataBufEnd); + convert_res = XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)dataBufEnd); *eventEndPP = s; defaultHandler(handlerArg, dataBuf, (int)(dataPtr - (ICHAR *)dataBuf)); *eventPP = s; - } while (s < end); + } while ((convert_res != XML_CONVERT_COMPLETED) && (convert_res != XML_CONVERT_INPUT_INCOMPLETE)); } else defaultHandler(handlerArg, (XML_Char *)s, (int)((XML_Char *)end - (XML_Char *)s)); From 779bfe99ee02f4e4af15833e8fdc79265e13dde7 Mon Sep 17 00:00:00 2001 From: Pascal Cuoq Date: Sun, 15 May 2016 19:11:55 +0200 Subject: [PATCH 7/8] Avoid undefined behavior when computing larger blockSize. The compiler might reason that (end - start)*2 is negative only if (end - start) is negative, see https://godbolt.org/g/wVEoTM --- src/expat/lib/xmlparse.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/expat/lib/xmlparse.c b/src/expat/lib/xmlparse.c index 09c9875844..373aa4b38b 100644 --- a/src/expat/lib/xmlparse.c +++ b/src/expat/lib/xmlparse.c @@ -6264,7 +6264,7 @@ poolGrow(STRING_POOL *pool) } if (pool->blocks && pool->start == pool->blocks->s) { BLOCK *temp; - int blockSize = (int)(pool->end - pool->start)*2; + int blockSize = (int)((unsigned)(pool->end - pool->start)*2U); if (blockSize < 0) return XML_FALSE; From 06381b5d81cb7db4dac9e95881591741f064c2c0 Mon Sep 17 00:00:00 2001 From: Pascal Cuoq Date: Sun, 15 May 2016 20:05:50 +0200 Subject: [PATCH 8/8] Do not compare an out-of-bounds pointer. See https://lwn.net/Articles/278137/ --- src/expat/lib/xmltok.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/expat/lib/xmltok.c b/src/expat/lib/xmltok.c index e69966fe41..a7f071ea4b 100644 --- a/src/expat/lib/xmltok.c +++ b/src/expat/lib/xmltok.c @@ -359,7 +359,7 @@ utf8_toUtf16(const ENCODING *enc, while (from < fromLim && to < toLim) { switch (((struct normal_encoding *)enc)->type[(unsigned char)*from]) { case BT_LEAD2: - if (from + 2 > fromLim) { + if (fromLim - from < 2) { res = XML_CONVERT_INPUT_INCOMPLETE; break; } @@ -367,7 +367,7 @@ utf8_toUtf16(const ENCODING *enc, from += 2; break; case BT_LEAD3: - if (from + 3 > fromLim) { + if (fromLim - from < 3) { res = XML_CONVERT_INPUT_INCOMPLETE; break; } @@ -378,11 +378,11 @@ utf8_toUtf16(const ENCODING *enc, case BT_LEAD4: { unsigned long n; - if (to + 2 > toLim) { + if (toLim - to < 2) { res = XML_CONVERT_OUTPUT_EXHAUSTED; goto after; } - if (from + 4 > fromLim) { + if (fromLim - from < 4) { res = XML_CONVERT_INPUT_INCOMPLETE; goto after; } @@ -620,7 +620,7 @@ E ## toUtf8(const ENCODING *enc, \ *fromP = from; \ return XML_CONVERT_OUTPUT_EXHAUSTED; \ } \ - if (from + 4 > fromLim) { \ + if (fromLim - from < 4) { \ *fromP = from; \ return XML_CONVERT_INPUT_INCOMPLETE; \ } \