lots of fsCharset, utf8/ascii converting clean-up and robustness stuff

Also, if fsCharsetToUtf8 can't convert to valid UTF-8, then don't add
it to the db, this way clients don't have to worry about weirdness and it
will force ppl to convert it.

git-svn-id: https://svn.musicpd.org/mpd/trunk@711 09075e82-0dd4-0310-85a5-a0d7c8717e4f
This commit is contained in:
Warren Dukes 2004-04-13 04:59:57 +00:00
parent 0927c61533
commit 1004890e25
7 changed files with 152 additions and 40 deletions

View File

@ -17,6 +17,8 @@
*/ */
#include "charConv.h" #include "charConv.h"
#include "mpd_types.h"
#include "utf8.h"
#include <stdlib.h> #include <stdlib.h>
#include <errno.h> #include <errno.h>
@ -25,14 +27,21 @@
#ifdef HAVE_ICONV #ifdef HAVE_ICONV
#include <iconv.h> #include <iconv.h>
iconv_t char_conv_iconv; iconv_t char_conv_iconv;
#endif
char * char_conv_to = NULL; char * char_conv_to = NULL;
char * char_conv_from = NULL; char * char_conv_from = NULL;
#endif mpd_sint8 char_conv_same = 0;
mpd_sint8 char_conv_use_iconv = 0;
/* 1 is to use asciiToUtf8
0 is not to use ascii/utf8 converter
-1 is to use utf8ToAscii*/
mpd_sint8 char_conv_asciiToUtf8 = 0;
#define BUFFER_SIZE 1024 #define BUFFER_SIZE 1024
int setCharSetConversion(char * to, char * from) { int setCharSetConversion(char * to, char * from) {
#ifdef HAVE_ICONV
if(char_conv_to && strcmp(to,char_conv_to)==0 && if(char_conv_to && strcmp(to,char_conv_to)==0 &&
char_conv_from && strcmp(from,char_conv_from)==0) char_conv_from && strcmp(from,char_conv_from)==0)
{ {
@ -41,60 +50,100 @@ int setCharSetConversion(char * to, char * from) {
closeCharSetConversion(); closeCharSetConversion();
if(0==strcmp(to,from)) {
char_conv_same = 1;
char_conv_to = strdup(to);
char_conv_from = strdup(from);
return 0;
}
if(strcmp(to,"UTF-8")==0 && strcmp(from,"ISO-8859-1")==0) {
char_conv_asciiToUtf8 = 1;
}
else if(strcmp(to,"ISO-8859-1")==0 && strcmp(from,"UTF-8")==0) {
char_conv_asciiToUtf8 = -1;
}
if(char_conv_asciiToUtf8!=0) {
char_conv_to = strdup(to);
char_conv_from = strdup(from);
return 0;
}
#ifdef HAVE_ICONV
if((char_conv_iconv = iconv_open(to,from))==(iconv_t)(-1)) return -1; if((char_conv_iconv = iconv_open(to,from))==(iconv_t)(-1)) return -1;
char_conv_to = strdup(to); char_conv_to = strdup(to);
char_conv_from = strdup(from); char_conv_from = strdup(from);
char_conv_use_iconv = 1;
return 0; return 0;
#endif #endif
return -1; return -1;
} }
char * convStrDup(char * string) { char * convStrDup(char * string) {
#ifdef HAVE_ICONV
char buffer[BUFFER_SIZE];
size_t inleft = strlen(string);
char * ret;
size_t outleft;
size_t retlen = 0;
size_t err;
char * bufferPtr;
if(!char_conv_to) return NULL; if(!char_conv_to) return NULL;
ret = malloc(1); if(char_conv_same) return strdup(string);
ret[0] = '\0';
while(inleft) { #ifdef HAVE_ICONV
bufferPtr = buffer; if(char_conv_use_iconv) {
outleft = BUFFER_SIZE; char buffer[BUFFER_SIZE];
err = iconv(char_conv_iconv,&string,&inleft,&bufferPtr, size_t inleft = strlen(string);
char * ret;
size_t outleft;
size_t retlen = 0;
size_t err;
char * bufferPtr;
ret = malloc(1);
ret[0] = '\0';
while(inleft) {
bufferPtr = buffer;
outleft = BUFFER_SIZE;
err = iconv(char_conv_iconv,&string,&inleft,&bufferPtr,
&outleft); &outleft);
if(outleft==BUFFER_SIZE || (err<0 && errno!=E2BIG)) { if(outleft==BUFFER_SIZE || (err<0 && errno!=E2BIG)) {
free(ret); free(ret);
return NULL; return NULL;
}
ret = realloc(ret,retlen+BUFFER_SIZE-outleft+1);
memcpy(ret+retlen,buffer,BUFFER_SIZE-outleft);
retlen+=BUFFER_SIZE-outleft;
ret[retlen] = '\0';
} }
ret = realloc(ret,retlen+BUFFER_SIZE-outleft+1); return ret;
memcpy(ret+retlen,buffer,BUFFER_SIZE-outleft); }
retlen+=BUFFER_SIZE-outleft; #endif
ret[retlen] = '\0';
switch(char_conv_asciiToUtf8) {
case 1:
return asciiStrToUtf8Dup(string);
break;
case -1:
return utf8StrToAsciiDup(string);
break;
} }
return ret;
#endif
return NULL; return NULL;
} }
void closeCharSetConversion() { void closeCharSetConversion() {
#ifdef HAVE_ICONV
if(char_conv_to) { if(char_conv_to) {
iconv_close(char_conv_iconv); #ifdef HAVE_ICONV
if(char_conv_use_iconv) iconv_close(char_conv_iconv);
#endif
free(char_conv_to); free(char_conv_to);
free(char_conv_from); free(char_conv_from);
char_conv_to = NULL; char_conv_to = NULL;
char_conv_from = NULL; char_conv_from = NULL;
char_conv_same = 0;
char_conv_asciiToUtf8 = 0;
char_conv_use_iconv = 0;
} }
#endif
} }

View File

@ -310,14 +310,18 @@ int removeDeletedFromDirectory(Directory * directory) {
while((ent = readdir(dir))) { while((ent = readdir(dir))) {
if(ent->d_name[0]=='.') continue; /* hide hidden stuff */ if(ent->d_name[0]=='.') continue; /* hide hidden stuff */
utf8 = strdup(fsCharsetToUtf8(ent->d_name)); utf8 = fsCharsetToUtf8(ent->d_name);
if(!utf8) continue;
utf8 = strdup(utf8);
if(directory->utf8name) { if(directory->utf8name) {
s = malloc(strlen(directory->utf8name)+strlen(utf8)+2); s = malloc(strlen(directory->utf8name)+strlen(utf8)+2);
sprintf(s,"%s/%s",directory->utf8name,utf8); sprintf(s,"%s/%s",directory->utf8name,utf8);
} }
else s= strdup(utf8); else s= strdup(utf8);
insertInList(entList,fsCharsetToUtf8(ent->d_name),s); insertInList(entList,utf8,s);
free(utf8); free(utf8);
} }
@ -377,7 +381,11 @@ int updateDirectory(Directory * directory) {
while((ent = readdir(dir))) { while((ent = readdir(dir))) {
if(ent->d_name[0]=='.') continue; /* hide hidden stuff */ if(ent->d_name[0]=='.') continue; /* hide hidden stuff */
utf8 = strdup(fsCharsetToUtf8(ent->d_name)); utf8 = fsCharsetToUtf8(ent->d_name);
if(!utf8) continue;
utf8 = strdup(utf8);
if(directory->utf8name) { if(directory->utf8name) {
s = malloc(strlen(directory->utf8name)+strlen(utf8)+2); s = malloc(strlen(directory->utf8name)+strlen(utf8)+2);
@ -415,7 +423,11 @@ int exploreDirectory(Directory * directory) {
while((ent = readdir(dir))) { while((ent = readdir(dir))) {
if(ent->d_name[0]=='.') continue; /* hide hidden stuff */ if(ent->d_name[0]=='.') continue; /* hide hidden stuff */
utf8 = strdup(fsCharsetToUtf8(ent->d_name)); utf8 = fsCharsetToUtf8(ent->d_name);
if(!utf8) continue;
utf8 = strdup(utf8);
DEBUG("explore: found: %s (%s)\n",ent->d_name,utf8); DEBUG("explore: found: %s (%s)\n",ent->d_name,utf8);

View File

@ -46,6 +46,7 @@ int lsPlaylists(FILE * fp, char * utf8path) {
struct stat st; struct stat st;
struct dirent * ent; struct dirent * ent;
char * dup; char * dup;
char * utf8;
char s[MAXPATHLEN+1]; char s[MAXPATHLEN+1];
List * list = NULL; List * list = NULL;
ListNode * node = NULL; ListNode * node = NULL;
@ -78,8 +79,9 @@ int lsPlaylists(FILE * fp, char * utf8path) {
if(list==NULL) list = makeList(NULL); if(list==NULL) list = makeList(NULL);
dup = strdup(ent->d_name); dup = strdup(ent->d_name);
dup[suff] = '\0'; dup[suff] = '\0';
insertInList(list, if((utf8 = fsCharsetToUtf8(dup))) {
fsCharsetToUtf8(dup),NULL); insertInList(list,utf8,NULL);
}
free(dup); free(dup);
} }
} }

View File

@ -20,6 +20,7 @@
#include "log.h" #include "log.h"
#include "charConv.h" #include "charConv.h"
#include "conf.h" #include "conf.h"
#include "utf8.h"
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
@ -46,24 +47,35 @@ char * pathConvCharset(char * to, char * from, char * str, char * ret) {
ret = convStrDup(str); ret = convStrDup(str);
} }
if(!ret) ret = strdup(str);
return ret; return ret;
} }
char * fsCharsetToUtf8(char * str) { char * fsCharsetToUtf8(char * str) {
static char * ret = NULL; static char * ret = NULL;
return ret = pathConvCharset("UTF-8",fsCharset,str,ret); ret = pathConvCharset("UTF-8",fsCharset,str,ret);
if(ret && !validUtf8String(ret)) ret = NULL;
/*if(!ret) ret = asciiStrToUtf8Dup(str);*/
/* if all else fails, just strdup */
return ret;
} }
char * utf8ToFsCharset(char * str) { char * utf8ToFsCharset(char * str) {
static char * ret = NULL; static char * ret = NULL;
return ret = pathConvCharset(fsCharset,"UTF-8",str,ret); ret = pathConvCharset(fsCharset,"UTF-8",str,ret);
if(!ret) ret = strdup(str);
return ret;
} }
void setFsCharset(char * charset) { void setFsCharset(char * charset) {
int error = 0;
if(fsCharset) free(fsCharset); if(fsCharset) free(fsCharset);
fsCharset = strdup(charset); fsCharset = strdup(charset);
@ -74,11 +86,19 @@ void setFsCharset(char * charset) {
ERROR("fs charset conversion problem: " ERROR("fs charset conversion problem: "
"not able to convert from \"%s\" to \"%s\"\n", "not able to convert from \"%s\" to \"%s\"\n",
fsCharset,"UTF-8"); fsCharset,"UTF-8");
error = 1;
} }
if(setCharSetConversion(fsCharset,"UTF-8")!=0) { if(setCharSetConversion(fsCharset,"UTF-8")!=0) {
ERROR("fs charset conversion problem: " ERROR("fs charset conversion problem: "
"not able to convert from \"%s\" to \"%s\"\n", "not able to convert from \"%s\" to \"%s\"\n",
"UTF-8",fsCharset); "UTF-8",fsCharset);
error = 1;
}
if(error) {
free(fsCharset);
ERROR("setting fs charset to ISO-8859-1!\n");
fsCharset = strdup("ISO-8859-1");
} }
} }

View File

@ -1122,7 +1122,9 @@ int loadPlaylist(FILE * fp, char * utf8file) {
free(temp); free(temp);
} }
slength = 0; slength = 0;
temp = strdup(fsCharsetToUtf8(s)); temp = fsCharsetToUtf8(s);
if(!temp) continue;
temp = strdup(temp);
if(s[0]==PLAYLIST_COMMENT && !getSong(temp)) { if(s[0]==PLAYLIST_COMMENT && !getSong(temp)) {
free(temp); free(temp);
continue; continue;

View File

@ -85,3 +85,28 @@ int validUtf8String(unsigned char * string) {
return 1; return 1;
} }
unsigned char * utf8StrToAsciiDup(unsigned char * utf8) {
/* utf8 should have at most two char's per ascii char */
int len = strlen(utf8)+1;
unsigned char * ret = malloc(len);
unsigned char * cp = ret;
int count;
memset(ret,0,len);
len = 0;
while(*utf8) {
count = validateUtf8Char(utf8);
if(!count) {
free(ret);
return NULL;
}
*(cp++) = utf8ToAscii(utf8);
utf8+= count;
len++;
}
return realloc(ret,len+1);
}

View File

@ -5,6 +5,8 @@ unsigned char * asciiToUtf8(unsigned char c);
unsigned char * asciiStrToUtf8Dup(unsigned char * ascii); unsigned char * asciiStrToUtf8Dup(unsigned char * ascii);
unsigned char * utf8StrToAsciiDup(unsigned char * utf8);
unsigned char utf8ToAscii(unsigned char * utf8); unsigned char utf8ToAscii(unsigned char * utf8);
int validateUtf8Char(unsigned char * utf8Char); int validateUtf8Char(unsigned char * utf8Char);