added "length" parameter to validUtf8String()
At several places, we create temporary copies of non-null-terminated strings, just to use them in functions like validUtf8String(). We can save this temporary allocation and avoid heap fragmentation if we add a length parameter instead of expecting a null-terminated string.
This commit is contained in:
parent
92b757674e
commit
43c389b961
@ -47,7 +47,7 @@ static char *path_conv_charset(char *dest, const char *to,
|
|||||||
char *fs_charset_to_utf8(char *dst, const char *str)
|
char *fs_charset_to_utf8(char *dst, const char *str)
|
||||||
{
|
{
|
||||||
char *ret = path_conv_charset(dst, "UTF-8", fsCharset, str);
|
char *ret = path_conv_charset(dst, "UTF-8", fsCharset, str);
|
||||||
return (ret && !validUtf8String(ret)) ? NULL : ret;
|
return (ret && !validUtf8String(ret, strlen(ret))) ? NULL : ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
char *utf8_to_fs_charset(char *dst, const char *str)
|
char *utf8_to_fs_charset(char *dst, const char *str)
|
||||||
|
@ -352,7 +352,7 @@ static inline char *fix_utf8(char *str) {
|
|||||||
|
|
||||||
assert(str != NULL);
|
assert(str != NULL);
|
||||||
|
|
||||||
if (validUtf8String(str))
|
if (validUtf8String(str, strlen(str)))
|
||||||
return str;
|
return str;
|
||||||
|
|
||||||
DEBUG("not valid utf8 in tag: %s\n",str);
|
DEBUG("not valid utf8 in tag: %s\n",str);
|
||||||
|
18
src/utf8.c
18
src/utf8.c
@ -69,10 +69,12 @@ static char utf8_to_latin1_char(const char *inUtf8)
|
|||||||
return (char)(c + utf8[1]);
|
return (char)(c + utf8[1]);
|
||||||
}
|
}
|
||||||
|
|
||||||
static unsigned int validateUtf8Char(const char *inUtf8Char)
|
static unsigned int validateUtf8Char(const char *inUtf8Char, size_t length)
|
||||||
{
|
{
|
||||||
const unsigned char *utf8Char = (const unsigned char *)inUtf8Char;
|
const unsigned char *utf8Char = (const unsigned char *)inUtf8Char;
|
||||||
|
|
||||||
|
assert(length > 0);
|
||||||
|
|
||||||
if (utf8Char[0] < 0x80)
|
if (utf8Char[0] < 0x80)
|
||||||
return 1;
|
return 1;
|
||||||
|
|
||||||
@ -84,7 +86,7 @@ static unsigned int validateUtf8Char(const char *inUtf8Char)
|
|||||||
t = (t >> 1);
|
t = (t >> 1);
|
||||||
count++;
|
count++;
|
||||||
}
|
}
|
||||||
if (count > 5)
|
if (count > 5 || (size_t)count > length)
|
||||||
return 0;
|
return 0;
|
||||||
for (i = 1; i <= count; i++) {
|
for (i = 1; i <= count; i++) {
|
||||||
if (utf8Char[i] < 0x80 || utf8Char[i] > 0xBF)
|
if (utf8Char[i] < 0x80 || utf8Char[i] > 0xBF)
|
||||||
@ -95,15 +97,17 @@ static unsigned int validateUtf8Char(const char *inUtf8Char)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
int validUtf8String(const char *string)
|
int validUtf8String(const char *string, size_t length)
|
||||||
{
|
{
|
||||||
unsigned int ret;
|
unsigned int ret;
|
||||||
|
|
||||||
while (*string) {
|
while (length > 0) {
|
||||||
ret = validateUtf8Char(string);
|
ret = validateUtf8Char(string, length);
|
||||||
|
assert((size_t)ret <= length);
|
||||||
if (0 == ret)
|
if (0 == ret)
|
||||||
return 0;
|
return 0;
|
||||||
string += ret;
|
string += ret;
|
||||||
|
length -= ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
return 1;
|
return 1;
|
||||||
@ -118,7 +122,7 @@ char *utf8StrToLatin1Dup(const char *utf8)
|
|||||||
size_t len = 0;
|
size_t len = 0;
|
||||||
|
|
||||||
while (*utf8) {
|
while (*utf8) {
|
||||||
count = validateUtf8Char(utf8);
|
count = validateUtf8Char(utf8, INT_MAX);
|
||||||
if (!count) {
|
if (!count) {
|
||||||
free(ret);
|
free(ret);
|
||||||
return NULL;
|
return NULL;
|
||||||
@ -140,7 +144,7 @@ char *utf8_to_latin1(char *dest, const char *utf8)
|
|||||||
size_t len = 0;
|
size_t len = 0;
|
||||||
|
|
||||||
while (*utf8) {
|
while (*utf8) {
|
||||||
count = validateUtf8Char(utf8);
|
count = validateUtf8Char(utf8, INT_MAX);
|
||||||
if (count) {
|
if (count) {
|
||||||
*(cp++) = utf8_to_latin1_char(utf8);
|
*(cp++) = utf8_to_latin1_char(utf8);
|
||||||
utf8 += count;
|
utf8 += count;
|
||||||
|
@ -19,11 +19,13 @@
|
|||||||
#ifndef UTF_8_H
|
#ifndef UTF_8_H
|
||||||
#define UTF_8_H
|
#define UTF_8_H
|
||||||
|
|
||||||
|
#include <os_compat.h>
|
||||||
|
|
||||||
char *latin1StrToUtf8Dup(const char *latin1);
|
char *latin1StrToUtf8Dup(const char *latin1);
|
||||||
|
|
||||||
char *utf8StrToLatin1Dup(const char *utf8);
|
char *utf8StrToLatin1Dup(const char *utf8);
|
||||||
|
|
||||||
int validUtf8String(const char *string);
|
int validUtf8String(const char *string, size_t length);
|
||||||
|
|
||||||
char *utf8_to_latin1(char *dest, const char *utf8);
|
char *utf8_to_latin1(char *dest, const char *utf8);
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user