added "length" parameter to validUtf8String()

At several places, we create temporary copies of non-null-terminated
strings, just to use them in functions like validUtf8String().  We can
save this temporary allocation and avoid heap fragmentation if we
add a length parameter instead of expecting a null-terminated string.
This commit is contained in:
Max Kellermann 2008-08-29 09:38:58 +02:00
parent 92b757674e
commit 43c389b961
4 changed files with 16 additions and 10 deletions

View File

@ -47,7 +47,7 @@ static char *path_conv_charset(char *dest, const char *to,
char *fs_charset_to_utf8(char *dst, const char *str)
{
char *ret = path_conv_charset(dst, "UTF-8", fsCharset, str);
return (ret && !validUtf8String(ret)) ? NULL : ret;
return (ret && !validUtf8String(ret, strlen(ret))) ? NULL : ret;
}
char *utf8_to_fs_charset(char *dst, const char *str)

View File

@ -352,7 +352,7 @@ static inline char *fix_utf8(char *str) {
assert(str != NULL);
if (validUtf8String(str))
if (validUtf8String(str, strlen(str)))
return str;
DEBUG("not valid utf8 in tag: %s\n",str);

View File

@ -69,10 +69,12 @@ static char utf8_to_latin1_char(const char *inUtf8)
return (char)(c + utf8[1]);
}
static unsigned int validateUtf8Char(const char *inUtf8Char)
static unsigned int validateUtf8Char(const char *inUtf8Char, size_t length)
{
const unsigned char *utf8Char = (const unsigned char *)inUtf8Char;
assert(length > 0);
if (utf8Char[0] < 0x80)
return 1;
@ -84,7 +86,7 @@ static unsigned int validateUtf8Char(const char *inUtf8Char)
t = (t >> 1);
count++;
}
if (count > 5)
if (count > 5 || (size_t)count > length)
return 0;
for (i = 1; i <= count; i++) {
if (utf8Char[i] < 0x80 || utf8Char[i] > 0xBF)
@ -95,15 +97,17 @@ static unsigned int validateUtf8Char(const char *inUtf8Char)
return 0;
}
int validUtf8String(const char *string)
int validUtf8String(const char *string, size_t length)
{
unsigned int ret;
while (*string) {
ret = validateUtf8Char(string);
while (length > 0) {
ret = validateUtf8Char(string, length);
assert((size_t)ret <= length);
if (0 == ret)
return 0;
string += ret;
length -= ret;
}
return 1;
@ -118,7 +122,7 @@ char *utf8StrToLatin1Dup(const char *utf8)
size_t len = 0;
while (*utf8) {
count = validateUtf8Char(utf8);
count = validateUtf8Char(utf8, INT_MAX);
if (!count) {
free(ret);
return NULL;
@ -140,7 +144,7 @@ char *utf8_to_latin1(char *dest, const char *utf8)
size_t len = 0;
while (*utf8) {
count = validateUtf8Char(utf8);
count = validateUtf8Char(utf8, INT_MAX);
if (count) {
*(cp++) = utf8_to_latin1_char(utf8);
utf8 += count;

View File

@ -19,11 +19,13 @@
#ifndef UTF_8_H
#define UTF_8_H
#include <os_compat.h>
char *latin1StrToUtf8Dup(const char *latin1);
char *utf8StrToLatin1Dup(const char *utf8);
int validUtf8String(const char *string);
int validUtf8String(const char *string, size_t length);
char *utf8_to_latin1(char *dest, const char *utf8);