wind: Deny invalid UTF-8 encodings
Codepoints above U+10FFFF and overlong encodings are considered invalid. Unpaired surrogates are not, as these are known to be generated on occasion — by Windows, for example. Signed-off-by: Joseph Sutton <josephsutton@catalyst.net.nz>
This commit is contained in:

committed by
Nico Williams

parent
bf25b38c0a
commit
a3878d3e9d
@@ -50,21 +50,32 @@ static const char *failing_testcases[] = {
|
||||
"\xF7",
|
||||
"\xC0\x01",
|
||||
"\xC0\x7F",
|
||||
"\xC0\x80",
|
||||
"\xC0\x81",
|
||||
"\xC0\xFF",
|
||||
"\xC0\x80\x80",
|
||||
"\xC1\x80",
|
||||
"\xE0\x01",
|
||||
"\xE0\x7F",
|
||||
"\xE0\x80",
|
||||
"\xE0\xFF",
|
||||
"\xE0\x80\x20",
|
||||
"\xE0\x80\xFF",
|
||||
"\xE0\x80\x80",
|
||||
"\xE0\x80\x81",
|
||||
"\xE0\x80\x80\x80",
|
||||
"\xE0\x81\x80",
|
||||
"\xF0\x01",
|
||||
"\xF0\x80",
|
||||
"\xF0\x80\x01",
|
||||
"\xF0\x80\x80",
|
||||
"\xF0\x80\x80\x01",
|
||||
"\xF0\x80\x80\x80",
|
||||
"\xF0\x80\x80\x81",
|
||||
"\xF0\x80\x80\xFF",
|
||||
"\xF0\x80\x81\x80",
|
||||
"\xF0\x81\x80\x80",
|
||||
"\xF7\xBF\xBF\xBF",
|
||||
NULL
|
||||
};
|
||||
|
||||
@@ -82,21 +93,10 @@ static const struct testcase testcases[] = {
|
||||
{"\x01", 1, {1}, 0},
|
||||
{"\x7F", 1, {0x7F}, 0},
|
||||
{"\x01\x7F", 2, {0x01, 0x7F}, 0},
|
||||
{"\xC0\x80", 1, {0}, 0},
|
||||
{"\xC0\x81", 1, {1}, 0},
|
||||
{"\xC1\x80", 1, {0x40}, 0},
|
||||
{"\xDF\xBF", 1, {0x7FF}, 0},
|
||||
{"\xE0\x80\x80", 1, {0}, 0},
|
||||
{"\xE0\x80\x81", 1, {1}, 0},
|
||||
{"\xE0\x81\x80", 1, {0x40}, 0},
|
||||
{"\xE1\x80\x80", 1, {0x1000}, 0},
|
||||
{"\xEF\xBF\xBF", 1, {0xFFFF}, 0},
|
||||
{"\xF0\x80\x80\x80", 1, {0}, 0},
|
||||
{"\xF0\x80\x80\x81", 1, {1}, 0},
|
||||
{"\xF0\x80\x81\x80", 1, {0x40}, 0},
|
||||
{"\xF0\x81\x80\x80", 1, {0x1000}, 0},
|
||||
{"\xF1\x80\x80\x80", 1, {0x40000}, 0},
|
||||
{"\xF7\xBF\xBF\xBF", 1, {0X1FFFFF}, 1},
|
||||
};
|
||||
|
||||
int
|
||||
|
@@ -39,13 +39,17 @@ utf8toutf32(const unsigned char **pp, uint32_t *out)
|
||||
{
|
||||
const unsigned char *p = *pp;
|
||||
uint32_t c = *p;
|
||||
uint32_t out_val;
|
||||
|
||||
if (c & 0x80) {
|
||||
if ((c & 0xE0) == 0xC0) {
|
||||
const uint32_t c2 = *++p;
|
||||
if ((c2 & 0xC0) == 0x80) {
|
||||
*out = ((c & 0x1F) << 6)
|
||||
out_val = ((c & 0x1F) << 6)
|
||||
| (c2 & 0x3F);
|
||||
if (out_val < 0x80) {
|
||||
return WIND_ERR_INVALID_UTF8;
|
||||
}
|
||||
} else {
|
||||
return WIND_ERR_INVALID_UTF8;
|
||||
}
|
||||
@@ -54,9 +58,12 @@ utf8toutf32(const unsigned char **pp, uint32_t *out)
|
||||
if ((c2 & 0xC0) == 0x80) {
|
||||
const uint32_t c3 = *++p;
|
||||
if ((c3 & 0xC0) == 0x80) {
|
||||
*out = ((c & 0x0F) << 12)
|
||||
out_val = ((c & 0x0F) << 12)
|
||||
| ((c2 & 0x3F) << 6)
|
||||
| (c3 & 0x3F);
|
||||
if (out_val < 0x800) {
|
||||
return WIND_ERR_INVALID_UTF8;
|
||||
}
|
||||
} else {
|
||||
return WIND_ERR_INVALID_UTF8;
|
||||
}
|
||||
@@ -70,10 +77,13 @@ utf8toutf32(const unsigned char **pp, uint32_t *out)
|
||||
if ((c3 & 0xC0) == 0x80) {
|
||||
const uint32_t c4 = *++p;
|
||||
if ((c4 & 0xC0) == 0x80) {
|
||||
*out = ((c & 0x07) << 18)
|
||||
out_val = ((c & 0x07) << 18)
|
||||
| ((c2 & 0x3F) << 12)
|
||||
| ((c3 & 0x3F) << 6)
|
||||
| (c4 & 0x3F);
|
||||
if (out_val < 0x10000) {
|
||||
return WIND_ERR_INVALID_UTF8;
|
||||
}
|
||||
} else {
|
||||
return WIND_ERR_INVALID_UTF8;
|
||||
}
|
||||
@@ -87,9 +97,16 @@ utf8toutf32(const unsigned char **pp, uint32_t *out)
|
||||
return WIND_ERR_INVALID_UTF8;
|
||||
}
|
||||
} else {
|
||||
*out = c;
|
||||
out_val = c;
|
||||
}
|
||||
|
||||
/* Allow unpaired surrogates (in the range 0xd800–0xdfff). */
|
||||
|
||||
if (out_val > 0x10ffff) {
|
||||
return WIND_ERR_INVALID_UTF8;
|
||||
}
|
||||
|
||||
*out = out_val;
|
||||
*pp = p;
|
||||
|
||||
return 0;
|
||||
|
Reference in New Issue
Block a user