2004-04-13 21:59:03 +02:00
|
|
|
/* the Music Player Daemon (MPD)
|
2006-07-14 21:37:45 +02:00
|
|
|
* (c)2003-2006 by Warren Dukes (warren.dukes@gmail.com
|
2004-04-13 21:59:03 +02:00
|
|
|
* This project's homepage is: http://www.musicpd.org
|
|
|
|
*
|
|
|
|
* This program is free software; you can redistribute it and/or modify
|
|
|
|
* it under the terms of the GNU General Public License as published by
|
|
|
|
* the Free Software Foundation; either version 2 of the License, or
|
|
|
|
* (at your option) any later version.
|
|
|
|
*
|
|
|
|
* This program is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
* GNU General Public License for more details.
|
|
|
|
* You should have received a copy of the GNU General Public License
|
|
|
|
* along with this program; if not, write to the Free Software
|
|
|
|
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
|
|
*/
|
|
|
|
|
2004-04-13 04:20:46 +02:00
|
|
|
#include "utf8.h"
|
|
|
|
|
|
|
|
#include <stdio.h>
|
|
|
|
#include <string.h>
|
|
|
|
#include <stdlib.h>
|
|
|
|
|
2006-07-20 18:02:40 +02:00
|
|
|
static char *latin1ToUtf8(char c)
|
|
|
|
{
|
2004-04-13 04:20:46 +02:00
|
|
|
static unsigned char utf8[3];
|
2006-07-17 23:46:32 +02:00
|
|
|
unsigned char uc = c;
|
2004-04-13 04:20:46 +02:00
|
|
|
|
2006-07-20 18:02:40 +02:00
|
|
|
memset(utf8, 0, 3);
|
2004-04-13 04:20:46 +02:00
|
|
|
|
2006-07-20 18:02:40 +02:00
|
|
|
if (uc < 128)
|
|
|
|
utf8[0] = uc;
|
|
|
|
else if (uc < 192) {
|
2004-04-13 04:20:46 +02:00
|
|
|
utf8[0] = 194;
|
2006-07-17 23:46:32 +02:00
|
|
|
utf8[1] = uc;
|
2006-07-20 18:02:40 +02:00
|
|
|
} else {
|
2004-04-13 04:20:46 +02:00
|
|
|
utf8[0] = 195;
|
2006-07-20 18:02:40 +02:00
|
|
|
utf8[1] = uc - 64;
|
2004-04-13 04:20:46 +02:00
|
|
|
}
|
|
|
|
|
2006-07-17 23:46:32 +02:00
|
|
|
return (char *)utf8;
|
2004-04-13 04:20:46 +02:00
|
|
|
}
|
|
|
|
|
2006-07-20 18:02:40 +02:00
|
|
|
char *latin1StrToUtf8Dup(char *latin1)
|
|
|
|
{
|
2004-04-13 15:52:16 +02:00
|
|
|
/* utf8 should have at most two char's per latin1 char */
|
2006-07-20 18:02:40 +02:00
|
|
|
int len = strlen(latin1) * 2 + 1;
|
|
|
|
char *ret = malloc(len);
|
|
|
|
char *cp = ret;
|
|
|
|
char *utf8;
|
2004-04-13 04:20:46 +02:00
|
|
|
|
2006-07-20 18:02:40 +02:00
|
|
|
memset(ret, 0, len);
|
2004-04-13 04:20:46 +02:00
|
|
|
|
|
|
|
len = 0;
|
|
|
|
|
2006-07-20 18:02:40 +02:00
|
|
|
while (*latin1) {
|
2004-04-13 15:52:16 +02:00
|
|
|
utf8 = latin1ToUtf8(*latin1);
|
2006-07-20 18:02:40 +02:00
|
|
|
while (*utf8) {
|
2004-04-13 04:20:46 +02:00
|
|
|
*(cp++) = *(utf8++);
|
|
|
|
len++;
|
|
|
|
}
|
2004-04-13 15:52:16 +02:00
|
|
|
latin1++;
|
2004-04-13 04:20:46 +02:00
|
|
|
}
|
|
|
|
|
2006-07-20 18:02:40 +02:00
|
|
|
return realloc(ret, len + 1);
|
2004-04-13 04:20:46 +02:00
|
|
|
}
|
|
|
|
|
2006-07-20 18:02:40 +02:00
|
|
|
static char utf8ToLatin1(char *inUtf8)
|
|
|
|
{
|
2004-04-13 04:20:46 +02:00
|
|
|
unsigned char c = 0;
|
2006-07-20 18:02:40 +02:00
|
|
|
unsigned char *utf8 = (unsigned char *)inUtf8;
|
|
|
|
|
|
|
|
if (utf8[0] < 128)
|
|
|
|
return utf8[0];
|
|
|
|
else if (utf8[0] == 195)
|
|
|
|
c += 64;
|
|
|
|
else if (utf8[0] != 194)
|
|
|
|
return '?';
|
|
|
|
return (char)(c + utf8[1]);
|
2004-04-13 04:20:46 +02:00
|
|
|
}
|
|
|
|
|
2006-07-20 18:02:40 +02:00
|
|
|
static int validateUtf8Char(char *inUtf8Char)
|
|
|
|
{
|
|
|
|
unsigned char *utf8Char = (unsigned char *)inUtf8Char;
|
2006-07-17 23:46:32 +02:00
|
|
|
|
2006-07-20 18:02:40 +02:00
|
|
|
if (utf8Char[0] < 0x80)
|
|
|
|
return 1;
|
|
|
|
|
|
|
|
if (utf8Char[0] >= 0xC0 && utf8Char[0] <= 0xFD) {
|
2004-04-13 04:20:46 +02:00
|
|
|
int count = 1;
|
2006-07-17 23:46:32 +02:00
|
|
|
char t = 1 << 5;
|
2004-04-13 04:20:46 +02:00
|
|
|
int i;
|
2006-07-20 18:02:40 +02:00
|
|
|
while (count < 6 && (t & utf8Char[0])) {
|
2004-04-13 04:20:46 +02:00
|
|
|
t = (t >> 1);
|
|
|
|
count++;
|
|
|
|
}
|
2006-07-20 18:02:40 +02:00
|
|
|
if (count > 5)
|
|
|
|
return 0;
|
|
|
|
for (i = 1; i <= count; i++) {
|
|
|
|
if (utf8Char[i] < 0x80 || utf8Char[i] > 0xBF)
|
|
|
|
return 0;
|
2004-04-13 04:20:46 +02:00
|
|
|
}
|
2006-07-20 18:02:40 +02:00
|
|
|
return count + 1;
|
|
|
|
} else
|
|
|
|
return 0;
|
2004-04-13 04:20:46 +02:00
|
|
|
}
|
|
|
|
|
2006-07-20 18:02:40 +02:00
|
|
|
int validUtf8String(char *string)
|
|
|
|
{
|
2004-04-13 04:20:46 +02:00
|
|
|
int ret;
|
|
|
|
|
2006-07-20 18:02:40 +02:00
|
|
|
while (*string) {
|
2004-04-13 04:20:46 +02:00
|
|
|
ret = validateUtf8Char(string);
|
2006-07-20 18:02:40 +02:00
|
|
|
if (0 == ret)
|
|
|
|
return 0;
|
|
|
|
string += ret;
|
2004-04-13 04:20:46 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
return 1;
|
|
|
|
}
|
2004-04-13 06:59:57 +02:00
|
|
|
|
2006-07-20 18:02:40 +02:00
|
|
|
char *utf8StrToLatin1Dup(char *utf8)
|
|
|
|
{
|
2004-04-13 15:52:16 +02:00
|
|
|
/* utf8 should have at most two char's per latin1 char */
|
2006-07-20 18:02:40 +02:00
|
|
|
int len = strlen(utf8) + 1;
|
|
|
|
char *ret = malloc(len);
|
|
|
|
char *cp = ret;
|
2004-04-13 06:59:57 +02:00
|
|
|
int count;
|
|
|
|
|
2006-07-20 18:02:40 +02:00
|
|
|
memset(ret, 0, len);
|
2004-04-13 06:59:57 +02:00
|
|
|
|
|
|
|
len = 0;
|
|
|
|
|
2006-07-20 18:02:40 +02:00
|
|
|
while (*utf8) {
|
2004-04-13 06:59:57 +02:00
|
|
|
count = validateUtf8Char(utf8);
|
2006-07-20 18:02:40 +02:00
|
|
|
if (!count) {
|
2004-04-13 06:59:57 +02:00
|
|
|
free(ret);
|
|
|
|
return NULL;
|
|
|
|
}
|
2004-04-13 15:52:16 +02:00
|
|
|
*(cp++) = utf8ToLatin1(utf8);
|
2006-07-20 18:02:40 +02:00
|
|
|
utf8 += count;
|
2004-04-13 06:59:57 +02:00
|
|
|
len++;
|
|
|
|
}
|
|
|
|
|
2006-07-20 18:02:40 +02:00
|
|
|
return realloc(ret, len + 1);
|
2004-04-13 06:59:57 +02:00
|
|
|
}
|