diff --git a/lib/wind/test-utf8.c b/lib/wind/test-utf8.c index 0b95032ff..b70cd74f6 100644 --- a/lib/wind/test-utf8.c +++ b/lib/wind/test-utf8.c @@ -50,21 +50,32 @@ static const char *failing_testcases[] = { "\xF7", "\xC0\x01", "\xC0\x7F", + "\xC0\x80", + "\xC0\x81", "\xC0\xFF", "\xC0\x80\x80", + "\xC1\x80", "\xE0\x01", "\xE0\x7F", "\xE0\x80", "\xE0\xFF", "\xE0\x80\x20", "\xE0\x80\xFF", + "\xE0\x80\x80", + "\xE0\x80\x81", "\xE0\x80\x80\x80", + "\xE0\x81\x80", "\xF0\x01", "\xF0\x80", "\xF0\x80\x01", "\xF0\x80\x80", "\xF0\x80\x80\x01", + "\xF0\x80\x80\x80", + "\xF0\x80\x80\x81", "\xF0\x80\x80\xFF", + "\xF0\x80\x81\x80", + "\xF0\x81\x80\x80", + "\xF7\xBF\xBF\xBF", NULL }; @@ -82,21 +93,10 @@ static const struct testcase testcases[] = { {"\x01", 1, {1}, 0}, {"\x7F", 1, {0x7F}, 0}, {"\x01\x7F", 2, {0x01, 0x7F}, 0}, - {"\xC0\x80", 1, {0}, 0}, - {"\xC0\x81", 1, {1}, 0}, - {"\xC1\x80", 1, {0x40}, 0}, {"\xDF\xBF", 1, {0x7FF}, 0}, - {"\xE0\x80\x80", 1, {0}, 0}, - {"\xE0\x80\x81", 1, {1}, 0}, - {"\xE0\x81\x80", 1, {0x40}, 0}, {"\xE1\x80\x80", 1, {0x1000}, 0}, {"\xEF\xBF\xBF", 1, {0xFFFF}, 0}, - {"\xF0\x80\x80\x80", 1, {0}, 0}, - {"\xF0\x80\x80\x81", 1, {1}, 0}, - {"\xF0\x80\x81\x80", 1, {0x40}, 0}, - {"\xF0\x81\x80\x80", 1, {0x1000}, 0}, {"\xF1\x80\x80\x80", 1, {0x40000}, 0}, - {"\xF7\xBF\xBF\xBF", 1, {0X1FFFFF}, 1}, }; int diff --git a/lib/wind/utf8.c b/lib/wind/utf8.c index 4dcc15f22..7e7144828 100644 --- a/lib/wind/utf8.c +++ b/lib/wind/utf8.c @@ -39,13 +39,17 @@ utf8toutf32(const unsigned char **pp, uint32_t *out) { const unsigned char *p = *pp; uint32_t c = *p; + uint32_t out_val; if (c & 0x80) { if ((c & 0xE0) == 0xC0) { const uint32_t c2 = *++p; if ((c2 & 0xC0) == 0x80) { - *out = ((c & 0x1F) << 6) + out_val = ((c & 0x1F) << 6) | (c2 & 0x3F); + if (out_val < 0x80) { + return WIND_ERR_INVALID_UTF8; + } } else { return WIND_ERR_INVALID_UTF8; } @@ -54,9 +58,12 @@ utf8toutf32(const unsigned char **pp, uint32_t *out) if ((c2 & 0xC0) == 0x80) { const uint32_t c3 = *++p; if ((c3 & 0xC0) == 0x80) { - *out = ((c & 0x0F) << 12) + out_val = ((c & 0x0F) << 12) | ((c2 & 0x3F) << 6) | (c3 & 0x3F); + if (out_val < 0x800) { + return WIND_ERR_INVALID_UTF8; + } } else { return WIND_ERR_INVALID_UTF8; } @@ -70,10 +77,13 @@ utf8toutf32(const unsigned char **pp, uint32_t *out) if ((c3 & 0xC0) == 0x80) { const uint32_t c4 = *++p; if ((c4 & 0xC0) == 0x80) { - *out = ((c & 0x07) << 18) + out_val = ((c & 0x07) << 18) | ((c2 & 0x3F) << 12) | ((c3 & 0x3F) << 6) | (c4 & 0x3F); + if (out_val < 0x10000) { + return WIND_ERR_INVALID_UTF8; + } } else { return WIND_ERR_INVALID_UTF8; } @@ -87,9 +97,16 @@ utf8toutf32(const unsigned char **pp, uint32_t *out) return WIND_ERR_INVALID_UTF8; } } else { - *out = c; + out_val = c; } + /* Allow unpaired surrogates (in the range 0xd800–0xdfff). */ + + if (out_val > 0x10ffff) { + return WIND_ERR_INVALID_UTF8; + } + + *out = out_val; *pp = p; return 0;