From 1004890e253453faba81126028986f075e5fc5e7 Mon Sep 17 00:00:00 2001
From: Warren Dukes <warren.dukes@gmail.com>
Date: Tue, 13 Apr 2004 04:59:57 +0000
Subject: [PATCH] lots of fsCharset, utf8/ascii converting clean-up and
 robustness stuff Also, if fsCharsetToUtf8 can't convert to valid UTF-8, then
 don't add it to the db, this way clients don't have to worry about weirdness
 and it will force ppl to convert it.

git-svn-id: https://svn.musicpd.org/mpd/trunk@711 09075e82-0dd4-0310-85a5-a0d7c8717e4f
---
 src/charConv.c  | 107 +++++++++++++++++++++++++++++++++++-------------
 src/directory.c |  20 +++++++--
 src/ls.c        |   6 ++-
 src/path.c      |  28 +++++++++++--
 src/playlist.c  |   4 +-
 src/utf8.c      |  25 +++++++++++
 src/utf8.h      |   2 +
 7 files changed, 152 insertions(+), 40 deletions(-)

diff --git a/src/charConv.c b/src/charConv.c
index 6d905bccc..402683e18 100644
--- a/src/charConv.c
+++ b/src/charConv.c
@@ -17,6 +17,8 @@
  */
 
 #include "charConv.h"
+#include "mpd_types.h"
+#include "utf8.h"
 
 #include <stdlib.h>
 #include <errno.h>
@@ -25,14 +27,21 @@
 #ifdef HAVE_ICONV
 #include <iconv.h>
 iconv_t char_conv_iconv;
+#endif
+
 char * char_conv_to = NULL;
 char * char_conv_from = NULL;
-#endif
+mpd_sint8 char_conv_same = 0;
+mpd_sint8 char_conv_use_iconv = 0;
+
+/* 1 is to use asciiToUtf8
+   0 is not to use ascii/utf8 converter
+  -1 is to use utf8ToAscii*/	
+mpd_sint8 char_conv_asciiToUtf8 = 0;
 
 #define BUFFER_SIZE	1024
 
 int setCharSetConversion(char * to, char * from) {
-#ifdef HAVE_ICONV
 	if(char_conv_to && strcmp(to,char_conv_to)==0 &&
 			char_conv_from && strcmp(from,char_conv_from)==0)
 	{ 
@@ -41,60 +50,100 @@ int setCharSetConversion(char * to, char * from) {
 
 	closeCharSetConversion();
 
+	if(0==strcmp(to,from)) {
+		char_conv_same = 1;
+		char_conv_to = strdup(to);
+		char_conv_from = strdup(from);
+		return 0;
+	}
+
+	if(strcmp(to,"UTF-8")==0 && strcmp(from,"ISO-8859-1")==0) {
+		char_conv_asciiToUtf8 = 1;
+	}
+	else if(strcmp(to,"ISO-8859-1")==0 && strcmp(from,"UTF-8")==0) {
+		char_conv_asciiToUtf8 = -1;
+	}
+
+	if(char_conv_asciiToUtf8!=0) {
+		char_conv_to = strdup(to);
+		char_conv_from = strdup(from);
+		return 0;
+	}
+
+#ifdef HAVE_ICONV
 	if((char_conv_iconv = iconv_open(to,from))==(iconv_t)(-1)) return -1;
 
 	char_conv_to = strdup(to);
 	char_conv_from = strdup(from);
+	char_conv_use_iconv = 1;
 
 	return 0;
 #endif
+
 	return -1;
 }
 
 char * convStrDup(char * string) {
-#ifdef HAVE_ICONV
-	char buffer[BUFFER_SIZE];
-	size_t inleft = strlen(string);
-	char * ret;
-	size_t outleft;
-	size_t retlen = 0;
-	size_t err;
-	char * bufferPtr;
-
 	if(!char_conv_to) return NULL;
 
-	ret = malloc(1);
-	ret[0] = '\0';
+	if(char_conv_same) return strdup(string);
 
-	while(inleft) {
-		bufferPtr = buffer;
-		outleft = BUFFER_SIZE;
-		err = iconv(char_conv_iconv,&string,&inleft,&bufferPtr,
+#ifdef HAVE_ICONV
+	if(char_conv_use_iconv) {
+		char buffer[BUFFER_SIZE];
+		size_t inleft = strlen(string);
+		char * ret;
+		size_t outleft;
+		size_t retlen = 0;
+		size_t err;
+		char * bufferPtr;
+
+		ret = malloc(1);
+		ret[0] = '\0';
+
+		while(inleft) {
+			bufferPtr = buffer;
+			outleft = BUFFER_SIZE;
+			err = iconv(char_conv_iconv,&string,&inleft,&bufferPtr,
 					&outleft);
-		if(outleft==BUFFER_SIZE || (err<0 && errno!=E2BIG)) {
-			free(ret);
-			return NULL;
+			if(outleft==BUFFER_SIZE || (err<0 && errno!=E2BIG)) {
+				free(ret);
+				return NULL;
+			}
+
+			ret = realloc(ret,retlen+BUFFER_SIZE-outleft+1);
+			memcpy(ret+retlen,buffer,BUFFER_SIZE-outleft);
+			retlen+=BUFFER_SIZE-outleft;
+			ret[retlen] = '\0';
 		}
 
-		ret = realloc(ret,retlen+BUFFER_SIZE-outleft+1);
-		memcpy(ret+retlen,buffer,BUFFER_SIZE-outleft);
-		retlen+=BUFFER_SIZE-outleft;
-		ret[retlen] = '\0';
+		return ret;
+	}
+#endif
+
+	switch(char_conv_asciiToUtf8) {
+	case 1: 
+		return asciiStrToUtf8Dup(string);
+		break;
+	case -1:
+		return utf8StrToAsciiDup(string);
+		break;
 	}
 
-	return ret;
-#endif
 	return NULL;
 }
 
 void closeCharSetConversion() {
-#ifdef HAVE_ICONV
 	if(char_conv_to) {
-		iconv_close(char_conv_iconv);
+#ifdef HAVE_ICONV
+		if(char_conv_use_iconv) iconv_close(char_conv_iconv);
+#endif
 		free(char_conv_to);
 		free(char_conv_from);
 		char_conv_to = NULL;
 		char_conv_from = NULL;
+		char_conv_same = 0;
+		char_conv_asciiToUtf8 = 0;
+		char_conv_use_iconv = 0;
 	}
-#endif
 }
diff --git a/src/directory.c b/src/directory.c
index 6b1ed9441..b4634c42f 100644
--- a/src/directory.c
+++ b/src/directory.c
@@ -310,14 +310,18 @@ int removeDeletedFromDirectory(Directory * directory) {
 	while((ent = readdir(dir))) {
 		if(ent->d_name[0]=='.') continue; /* hide hidden stuff */
 
-		utf8 = strdup(fsCharsetToUtf8(ent->d_name));
+		utf8 = fsCharsetToUtf8(ent->d_name);
+
+		if(!utf8) continue;
+
+		utf8 = strdup(utf8);
 
 		if(directory->utf8name) {
 			s = malloc(strlen(directory->utf8name)+strlen(utf8)+2);
 			sprintf(s,"%s/%s",directory->utf8name,utf8);
 		}
 		else s= strdup(utf8);
-		insertInList(entList,fsCharsetToUtf8(ent->d_name),s);
+		insertInList(entList,utf8,s);
 		free(utf8);
 	}
 
@@ -377,7 +381,11 @@ int updateDirectory(Directory * directory) {
 	while((ent = readdir(dir))) {
 		if(ent->d_name[0]=='.') continue; /* hide hidden stuff */
 
-		utf8 = strdup(fsCharsetToUtf8(ent->d_name));
+		utf8 = fsCharsetToUtf8(ent->d_name);
+
+		if(!utf8) continue;
+
+		utf8 = strdup(utf8);
 
 		if(directory->utf8name) {
 			s = malloc(strlen(directory->utf8name)+strlen(utf8)+2);
@@ -415,7 +423,11 @@ int exploreDirectory(Directory * directory) {
 	while((ent = readdir(dir))) {
 		if(ent->d_name[0]=='.') continue; /* hide hidden stuff */
 
-		utf8 = strdup(fsCharsetToUtf8(ent->d_name));
+		utf8 = fsCharsetToUtf8(ent->d_name);
+
+		if(!utf8) continue;
+
+		utf8 = strdup(utf8);
 
 		DEBUG("explore: found: %s (%s)\n",ent->d_name,utf8);
 
diff --git a/src/ls.c b/src/ls.c
index a47f64d74..d461109d5 100644
--- a/src/ls.c
+++ b/src/ls.c
@@ -46,6 +46,7 @@ int lsPlaylists(FILE * fp, char * utf8path) {
 	struct stat st;
 	struct dirent * ent;
 	char * dup;
+	char * utf8;
 	char s[MAXPATHLEN+1];
 	List * list = NULL;
 	ListNode * node = NULL;
@@ -78,8 +79,9 @@ int lsPlaylists(FILE * fp, char * utf8path) {
 					if(list==NULL) list = makeList(NULL);
 					dup = strdup(ent->d_name);
 					dup[suff] = '\0';
-					insertInList(list,
-						fsCharsetToUtf8(dup),NULL);
+					if((utf8 = fsCharsetToUtf8(dup))) {
+						insertInList(list,utf8,NULL);
+					}
 					free(dup);
 				}
 			}
diff --git a/src/path.c b/src/path.c
index e209b671d..db41ed968 100644
--- a/src/path.c
+++ b/src/path.c
@@ -20,6 +20,7 @@
 #include "log.h"
 #include "charConv.h"
 #include "conf.h"
+#include "utf8.h"
 
 #include <stdlib.h>
 #include <string.h>
@@ -46,24 +47,35 @@ char * pathConvCharset(char * to, char * from, char * str, char * ret) {
 		ret = convStrDup(str);
 	}
 
-	if(!ret) ret = strdup(str);
-
 	return ret;
 }
 
 char * fsCharsetToUtf8(char * str) {
 	static char * ret = NULL;
 
-	return ret = pathConvCharset("UTF-8",fsCharset,str,ret);
+	ret = pathConvCharset("UTF-8",fsCharset,str,ret);
+
+	if(ret && !validUtf8String(ret)) ret = NULL;
+	/*if(!ret) ret = asciiStrToUtf8Dup(str);*/
+
+	/* if all else fails, just strdup */
+
+	return ret;
 }
 
 char * utf8ToFsCharset(char * str) {
 	static char * ret = NULL;
 
-	return ret = pathConvCharset(fsCharset,"UTF-8",str,ret);
+	ret = pathConvCharset(fsCharset,"UTF-8",str,ret);
+
+	if(!ret) ret = strdup(str);
+
+	return ret;
 }
 
 void setFsCharset(char * charset) {
+	int error = 0;
+
 	if(fsCharset) free(fsCharset);
 
 	fsCharset = strdup(charset);
@@ -74,11 +86,19 @@ void setFsCharset(char * charset) {
 		ERROR("fs charset conversion problem: "
 			"not able to convert from \"%s\" to \"%s\"\n",
 			fsCharset,"UTF-8");
+		error = 1;
 	}
 	if(setCharSetConversion(fsCharset,"UTF-8")!=0) {
 		ERROR("fs charset conversion problem: "
 			"not able to convert from \"%s\" to \"%s\"\n",
 			"UTF-8",fsCharset);
+		error = 1;
+	}
+	
+	if(error) {
+		free(fsCharset);
+		ERROR("setting fs charset to ISO-8859-1!\n");
+		fsCharset = strdup("ISO-8859-1");
 	}
 }
 
diff --git a/src/playlist.c b/src/playlist.c
index 471c1b08e..cf064047f 100644
--- a/src/playlist.c
+++ b/src/playlist.c
@@ -1122,7 +1122,9 @@ int loadPlaylist(FILE * fp, char * utf8file) {
 				free(temp);
 			}
 			slength = 0;
-			temp = strdup(fsCharsetToUtf8(s));
+			temp = fsCharsetToUtf8(s);
+			if(!temp) continue;
+			temp = strdup(temp);
 			if(s[0]==PLAYLIST_COMMENT && !getSong(temp)) {
 				free(temp);
 				continue;
diff --git a/src/utf8.c b/src/utf8.c
index 140316150..4b8814a80 100644
--- a/src/utf8.c
+++ b/src/utf8.c
@@ -85,3 +85,28 @@ int validUtf8String(unsigned char * string) {
 
 	return 1;
 }
+
+unsigned char * utf8StrToAsciiDup(unsigned char * utf8) {
+	/* utf8 should have at most two char's per ascii char */
+	int len = strlen(utf8)+1;
+	unsigned char * ret = malloc(len);
+	unsigned char * cp = ret;
+	int count;
+
+	memset(ret,0,len);
+
+	len = 0;
+
+	while(*utf8) {
+		count = validateUtf8Char(utf8);
+		if(!count) {
+			free(ret);
+			return NULL;
+		}
+		*(cp++) = utf8ToAscii(utf8);
+		utf8+= count;
+		len++;
+	}
+
+	return realloc(ret,len+1);
+}
diff --git a/src/utf8.h b/src/utf8.h
index 1928a8a81..bf8f1a9c9 100644
--- a/src/utf8.h
+++ b/src/utf8.h
@@ -5,6 +5,8 @@ unsigned char * asciiToUtf8(unsigned char c);
 
 unsigned char * asciiStrToUtf8Dup(unsigned char * ascii);
 
+unsigned char * utf8StrToAsciiDup(unsigned char * utf8);
+
 unsigned char utf8ToAscii(unsigned char * utf8);
 
 int validateUtf8Char(unsigned char * utf8Char);