tag: don't accept invalid UTF-8 sequences

Overwrite invalid UTF-8 sequences with question marks.
This commit is contained in:
Max Kellermann 2009-02-27 09:02:32 +01:00
parent c1ab2d06aa
commit 497c0b1c18

View File

@ -407,24 +407,46 @@ bool tag_equal(const struct tag *tag1, const struct tag *tag2)
return true; return true;
} }
/**
* Replace invalid sequences with the question mark.
*/
static char *
patch_utf8(const char *src, size_t length, const gchar *end)
{
/* duplicate the string, and replace invalid bytes in that
buffer */
char *dest = g_strdup(src);
do {
dest[end - src] = '?';
} while (!g_utf8_validate(end + 1, (src + length) - (end + 1), &end));
return dest;
}
static char * static char *
fix_utf8(const char *str, size_t length) fix_utf8(const char *str, size_t length)
{ {
const gchar *end;
char *temp; char *temp;
gsize written; gsize written;
assert(str != NULL); assert(str != NULL);
if (g_utf8_validate(str, length, NULL)) /* check if the string is already valid UTF-8 */
if (g_utf8_validate(str, length, &end))
return NULL; return NULL;
DEBUG("not valid utf8 in tag: %s\n",str); /* no, it's not - try to import it from ISO-Latin-1 */
temp = g_convert(str, length, "utf-8", "iso-8859-1", temp = g_convert(str, length, "utf-8", "iso-8859-1",
NULL, &written, NULL); NULL, &written, NULL);
if (temp == NULL) if (temp != NULL)
return NULL; /* success! */
return temp;
return temp; /* no, still broken - there's no medication, just patch
invalid sequences */
return patch_utf8(str, length, end);
} }
void tag_begin_add(struct tag *tag) void tag_begin_add(struct tag *tag)