added "length" parameter to validUtf8String()

At several places, we create temporary copies of non-null-terminated
strings, just to use them in functions like validUtf8String().  We can
save this temporary allocation and avoid heap fragmentation if we
add a length parameter instead of expecting a null-terminated string.
This commit is contained in:
Max Kellermann 2008-08-29 09:38:58 +02:00
parent 92b757674e
commit 43c389b961
4 changed files with 16 additions and 10 deletions

View File

@ -47,7 +47,7 @@ static char *path_conv_charset(char *dest, const char *to,
char *fs_charset_to_utf8(char *dst, const char *str) char *fs_charset_to_utf8(char *dst, const char *str)
{ {
char *ret = path_conv_charset(dst, "UTF-8", fsCharset, str); char *ret = path_conv_charset(dst, "UTF-8", fsCharset, str);
return (ret && !validUtf8String(ret)) ? NULL : ret; return (ret && !validUtf8String(ret, strlen(ret))) ? NULL : ret;
} }
char *utf8_to_fs_charset(char *dst, const char *str) char *utf8_to_fs_charset(char *dst, const char *str)

View File

@ -352,7 +352,7 @@ static inline char *fix_utf8(char *str) {
assert(str != NULL); assert(str != NULL);
if (validUtf8String(str)) if (validUtf8String(str, strlen(str)))
return str; return str;
DEBUG("not valid utf8 in tag: %s\n",str); DEBUG("not valid utf8 in tag: %s\n",str);

View File

@ -69,10 +69,12 @@ static char utf8_to_latin1_char(const char *inUtf8)
return (char)(c + utf8[1]); return (char)(c + utf8[1]);
} }
static unsigned int validateUtf8Char(const char *inUtf8Char) static unsigned int validateUtf8Char(const char *inUtf8Char, size_t length)
{ {
const unsigned char *utf8Char = (const unsigned char *)inUtf8Char; const unsigned char *utf8Char = (const unsigned char *)inUtf8Char;
assert(length > 0);
if (utf8Char[0] < 0x80) if (utf8Char[0] < 0x80)
return 1; return 1;
@ -84,7 +86,7 @@ static unsigned int validateUtf8Char(const char *inUtf8Char)
t = (t >> 1); t = (t >> 1);
count++; count++;
} }
if (count > 5) if (count > 5 || (size_t)count > length)
return 0; return 0;
for (i = 1; i <= count; i++) { for (i = 1; i <= count; i++) {
if (utf8Char[i] < 0x80 || utf8Char[i] > 0xBF) if (utf8Char[i] < 0x80 || utf8Char[i] > 0xBF)
@ -95,15 +97,17 @@ static unsigned int validateUtf8Char(const char *inUtf8Char)
return 0; return 0;
} }
int validUtf8String(const char *string) int validUtf8String(const char *string, size_t length)
{ {
unsigned int ret; unsigned int ret;
while (*string) { while (length > 0) {
ret = validateUtf8Char(string); ret = validateUtf8Char(string, length);
assert((size_t)ret <= length);
if (0 == ret) if (0 == ret)
return 0; return 0;
string += ret; string += ret;
length -= ret;
} }
return 1; return 1;
@ -118,7 +122,7 @@ char *utf8StrToLatin1Dup(const char *utf8)
size_t len = 0; size_t len = 0;
while (*utf8) { while (*utf8) {
count = validateUtf8Char(utf8); count = validateUtf8Char(utf8, INT_MAX);
if (!count) { if (!count) {
free(ret); free(ret);
return NULL; return NULL;
@ -140,7 +144,7 @@ char *utf8_to_latin1(char *dest, const char *utf8)
size_t len = 0; size_t len = 0;
while (*utf8) { while (*utf8) {
count = validateUtf8Char(utf8); count = validateUtf8Char(utf8, INT_MAX);
if (count) { if (count) {
*(cp++) = utf8_to_latin1_char(utf8); *(cp++) = utf8_to_latin1_char(utf8);
utf8 += count; utf8 += count;

View File

@ -19,11 +19,13 @@
#ifndef UTF_8_H #ifndef UTF_8_H
#define UTF_8_H #define UTF_8_H
#include <os_compat.h>
char *latin1StrToUtf8Dup(const char *latin1); char *latin1StrToUtf8Dup(const char *latin1);
char *utf8StrToLatin1Dup(const char *utf8); char *utf8StrToLatin1Dup(const char *utf8);
int validUtf8String(const char *string); int validUtf8String(const char *string, size_t length);
char *utf8_to_latin1(char *dest, const char *utf8); char *utf8_to_latin1(char *dest, const char *utf8);