added "length" parameter to validUtf8String()
At several places, we create temporary copies of non-null-terminated strings, just to use them in functions like validUtf8String(). We can save this temporary allocation and avoid heap fragmentation if we add a length parameter instead of expecting a null-terminated string.
This commit is contained in:
parent
92b757674e
commit
43c389b961
@ -47,7 +47,7 @@ static char *path_conv_charset(char *dest, const char *to,
|
||||
char *fs_charset_to_utf8(char *dst, const char *str)
|
||||
{
|
||||
char *ret = path_conv_charset(dst, "UTF-8", fsCharset, str);
|
||||
return (ret && !validUtf8String(ret)) ? NULL : ret;
|
||||
return (ret && !validUtf8String(ret, strlen(ret))) ? NULL : ret;
|
||||
}
|
||||
|
||||
char *utf8_to_fs_charset(char *dst, const char *str)
|
||||
|
@ -352,7 +352,7 @@ static inline char *fix_utf8(char *str) {
|
||||
|
||||
assert(str != NULL);
|
||||
|
||||
if (validUtf8String(str))
|
||||
if (validUtf8String(str, strlen(str)))
|
||||
return str;
|
||||
|
||||
DEBUG("not valid utf8 in tag: %s\n",str);
|
||||
|
18
src/utf8.c
18
src/utf8.c
@ -69,10 +69,12 @@ static char utf8_to_latin1_char(const char *inUtf8)
|
||||
return (char)(c + utf8[1]);
|
||||
}
|
||||
|
||||
static unsigned int validateUtf8Char(const char *inUtf8Char)
|
||||
static unsigned int validateUtf8Char(const char *inUtf8Char, size_t length)
|
||||
{
|
||||
const unsigned char *utf8Char = (const unsigned char *)inUtf8Char;
|
||||
|
||||
assert(length > 0);
|
||||
|
||||
if (utf8Char[0] < 0x80)
|
||||
return 1;
|
||||
|
||||
@ -84,7 +86,7 @@ static unsigned int validateUtf8Char(const char *inUtf8Char)
|
||||
t = (t >> 1);
|
||||
count++;
|
||||
}
|
||||
if (count > 5)
|
||||
if (count > 5 || (size_t)count > length)
|
||||
return 0;
|
||||
for (i = 1; i <= count; i++) {
|
||||
if (utf8Char[i] < 0x80 || utf8Char[i] > 0xBF)
|
||||
@ -95,15 +97,17 @@ static unsigned int validateUtf8Char(const char *inUtf8Char)
|
||||
return 0;
|
||||
}
|
||||
|
||||
int validUtf8String(const char *string)
|
||||
int validUtf8String(const char *string, size_t length)
|
||||
{
|
||||
unsigned int ret;
|
||||
|
||||
while (*string) {
|
||||
ret = validateUtf8Char(string);
|
||||
while (length > 0) {
|
||||
ret = validateUtf8Char(string, length);
|
||||
assert((size_t)ret <= length);
|
||||
if (0 == ret)
|
||||
return 0;
|
||||
string += ret;
|
||||
length -= ret;
|
||||
}
|
||||
|
||||
return 1;
|
||||
@ -118,7 +122,7 @@ char *utf8StrToLatin1Dup(const char *utf8)
|
||||
size_t len = 0;
|
||||
|
||||
while (*utf8) {
|
||||
count = validateUtf8Char(utf8);
|
||||
count = validateUtf8Char(utf8, INT_MAX);
|
||||
if (!count) {
|
||||
free(ret);
|
||||
return NULL;
|
||||
@ -140,7 +144,7 @@ char *utf8_to_latin1(char *dest, const char *utf8)
|
||||
size_t len = 0;
|
||||
|
||||
while (*utf8) {
|
||||
count = validateUtf8Char(utf8);
|
||||
count = validateUtf8Char(utf8, INT_MAX);
|
||||
if (count) {
|
||||
*(cp++) = utf8_to_latin1_char(utf8);
|
||||
utf8 += count;
|
||||
|
@ -19,11 +19,13 @@
|
||||
#ifndef UTF_8_H
|
||||
#define UTF_8_H
|
||||
|
||||
#include <os_compat.h>
|
||||
|
||||
char *latin1StrToUtf8Dup(const char *latin1);
|
||||
|
||||
char *utf8StrToLatin1Dup(const char *utf8);
|
||||
|
||||
int validUtf8String(const char *string);
|
||||
int validUtf8String(const char *string, size_t length);
|
||||
|
||||
char *utf8_to_latin1(char *dest, const char *utf8);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user