fs/Charset: move code to wrapper class IcuConverter

Prepare for a ICU-based backend without GLib.
This commit is contained in:
Max Kellermann 2014-11-29 23:59:37 +01:00
parent 4f80a129f1
commit 87c88fcb27
4 changed files with 183 additions and 65 deletions

View File

@ -57,9 +57,9 @@ src_mpd_LDADD = \
libevent.a \ libevent.a \
libthread.a \ libthread.a \
libsystem.a \ libsystem.a \
$(ICU_LDADD) \
libutil.a \ libutil.a \
$(FS_LIBS) \ $(FS_LIBS) \
$(ICU_LDADD) \
$(SYSTEMD_DAEMON_LIBS) \ $(SYSTEMD_DAEMON_LIBS) \
$(GLIB_LIBS) $(GLIB_LIBS)
@ -452,6 +452,7 @@ libevent_a_SOURCES = \
libicu_a_SOURCES = \ libicu_a_SOURCES = \
src/lib/icu/Collate.cxx src/lib/icu/Collate.hxx \ src/lib/icu/Collate.cxx src/lib/icu/Collate.hxx \
src/lib/icu/Converter.cxx src/lib/icu/Converter.hxx \
src/lib/icu/Error.cxx src/lib/icu/Error.hxx src/lib/icu/Error.cxx src/lib/icu/Error.hxx
if HAVE_ICU if HAVE_ICU
@ -1542,6 +1543,7 @@ endif
test_read_conf_LDADD = \ test_read_conf_LDADD = \
libconf.a \ libconf.a \
$(FS_LIBS) \ $(FS_LIBS) \
$(ICU_LDADD) \
libsystem.a \ libsystem.a \
libutil.a \ libutil.a \
$(GLIB_LIBS) $(GLIB_LIBS)
@ -1589,6 +1591,7 @@ endif
test_run_storage_LDADD = \ test_run_storage_LDADD = \
$(STORAGE_LIBS) \ $(STORAGE_LIBS) \
$(FS_LIBS) \ $(FS_LIBS) \
$(ICU_LDADD) \
libevent.a \ libevent.a \
libthread.a \ libthread.a \
libsystem.a \ libsystem.a \
@ -1611,6 +1614,7 @@ test_run_input_LDADD = \
libevent.a \ libevent.a \
libthread.a \ libthread.a \
$(FS_LIBS) \ $(FS_LIBS) \
$(ICU_LDADD) \
libsystem.a \ libsystem.a \
$(GLIB_LIBS) $(GLIB_LIBS)
test_run_input_SOURCES = test/run_input.cxx \ test_run_input_SOURCES = test/run_input.cxx \
@ -1635,6 +1639,7 @@ test_run_neighbor_explorer_LDADD = $(AM_LDADD) \
libconf.a \ libconf.a \
libevent.a \ libevent.a \
$(FS_LIBS) \ $(FS_LIBS) \
$(ICU_LDADD) \
libsystem.a \ libsystem.a \
libthread.a \ libthread.a \
libutil.a libutil.a
@ -1662,6 +1667,7 @@ test_visit_archive_LDADD = \
libevent.a \ libevent.a \
libthread.a \ libthread.a \
$(FS_LIBS) \ $(FS_LIBS) \
$(ICU_LDADD) \
libsystem.a \ libsystem.a \
$(GLIB_LIBS) $(GLIB_LIBS)
test_visit_archive_SOURCES = test/visit_archive.cxx \ test_visit_archive_SOURCES = test/visit_archive.cxx \
@ -1687,6 +1693,7 @@ test_run_gunzip_LDADD = \
$(GLIB_LIBS) \ $(GLIB_LIBS) \
libutil.a \ libutil.a \
$(FS_LIBS) \ $(FS_LIBS) \
$(ICU_LDADD) \
libsystem.a libsystem.a
endif endif
@ -1698,6 +1705,7 @@ test_dump_text_file_LDADD = \
libconf.a \ libconf.a \
libevent.a \ libevent.a \
$(FS_LIBS) \ $(FS_LIBS) \
$(ICU_LDADD) \
libsystem.a \ libsystem.a \
libthread.a \ libthread.a \
libutil.a \ libutil.a \
@ -1719,6 +1727,7 @@ test_dump_playlist_LDADD = \
libevent.a \ libevent.a \
libthread.a \ libthread.a \
$(FS_LIBS) \ $(FS_LIBS) \
$(ICU_LDADD) \
libsystem.a \ libsystem.a \
libutil.a \ libutil.a \
libpcm.a \ libpcm.a \
@ -1750,6 +1759,7 @@ test_run_decoder_LDADD = \
libevent.a \ libevent.a \
libthread.a \ libthread.a \
$(FS_LIBS) \ $(FS_LIBS) \
$(ICU_LDADD) \
libsystem.a \ libsystem.a \
libutil.a \ libutil.a \
$(GLIB_LIBS) $(GLIB_LIBS)
@ -1776,6 +1786,7 @@ test_read_tags_LDADD = \
libevent.a \ libevent.a \
libthread.a \ libthread.a \
$(FS_LIBS) \ $(FS_LIBS) \
$(ICU_LDADD) \
libsystem.a \ libsystem.a \
libutil.a \ libutil.a \
$(GLIB_LIBS) $(GLIB_LIBS)
@ -1802,6 +1813,7 @@ test_run_filter_LDADD = \
$(FILTER_LIBS) \ $(FILTER_LIBS) \
libconf.a \ libconf.a \
$(FS_LIBS) \ $(FS_LIBS) \
$(ICU_LDADD) \
libsystem.a \ libsystem.a \
libutil.a \ libutil.a \
$(GLIB_LIBS) $(GLIB_LIBS)
@ -1830,6 +1842,7 @@ test_run_encoder_LDADD = \
libpcm.a \ libpcm.a \
libthread.a \ libthread.a \
$(FS_LIBS) \ $(FS_LIBS) \
$(ICU_LDADD) \
libsystem.a \ libsystem.a \
libutil.a \ libutil.a \
$(GLIB_LIBS) $(GLIB_LIBS)
@ -1852,6 +1865,7 @@ test_test_vorbis_encoder_LDADD = $(MPD_LIBS) \
$(TAG_LIBS) \ $(TAG_LIBS) \
libconf.a \ libconf.a \
$(FS_LIBS) \ $(FS_LIBS) \
$(ICU_LDADD) \
libsystem.a \ libsystem.a \
libutil.a \ libutil.a \
$(GLIB_LIBS) $(GLIB_LIBS)
@ -1911,6 +1925,7 @@ test_run_output_LDADD = $(MPD_LIBS) \
libconf.a \ libconf.a \
libevent.a \ libevent.a \
$(FS_LIBS) \ $(FS_LIBS) \
$(ICU_LDADD) \
libsystem.a \ libsystem.a \
libthread.a \ libthread.a \
libutil.a \ libutil.a \
@ -1940,6 +1955,7 @@ test_read_mixer_LDADD = \
libconf.a \ libconf.a \
libevent.a \ libevent.a \
$(FS_LIBS) \ $(FS_LIBS) \
$(ICU_LDADD) \
libsystem.a \ libsystem.a \
libutil.a \ libutil.a \
$(GLIB_LIBS) $(GLIB_LIBS)
@ -2075,6 +2091,7 @@ test_test_translate_song_LDADD = \
$(STORAGE_LIBS) \ $(STORAGE_LIBS) \
libtag.a \ libtag.a \
$(FS_LIBS) \ $(FS_LIBS) \
$(ICU_LDADD) \
libsystem.a \ libsystem.a \
libutil.a \ libutil.a \
$(GLIB_LIBS) \ $(GLIB_LIBS) \

View File

@ -23,12 +23,8 @@
#include "Limits.hxx" #include "Limits.hxx"
#include "Log.hxx" #include "Log.hxx"
#include "Traits.hxx" #include "Traits.hxx"
#include "lib/icu/Converter.hxx"
#include "util/Error.hxx" #include "util/Error.hxx"
#include "util/Domain.hxx"
#ifdef HAVE_GLIB
#include <glib.h>
#endif
#include <algorithm> #include <algorithm>
@ -37,49 +33,19 @@
#ifdef HAVE_FS_CHARSET #ifdef HAVE_FS_CHARSET
static constexpr Domain convert_domain("convert");
/**
* Maximal number of bytes required to represent path name in UTF-8
* (including nul-terminator).
* This value is a rought estimate of upper bound.
* It's based on path name limit in bytes (MPD_PATH_MAX)
* and assumption that some weird encoding could represent some UTF-8 4 byte
* sequences with single byte.
*/
static constexpr size_t MPD_PATH_MAX_UTF8 = (MPD_PATH_MAX - 1) * 4 + 1;
static std::string fs_charset; static std::string fs_charset;
gcc_pure static IcuConverter *fs_converter;
static bool
CheckCharset(const char *charset, Error &error)
{
/* convert a space to check if the charset is valid */
GError *error2 = nullptr;
char *test = g_convert(" ", 1, charset, "UTF-8", nullptr, nullptr, &error2);
if (test == nullptr) {
error.Set(convert_domain, error2->code, error2->message);
g_error_free(error2);
return false;
}
g_free(test);
return true;
}
bool bool
SetFSCharset(const char *charset, Error &error) SetFSCharset(const char *charset, Error &error)
{ {
assert(charset != nullptr); assert(charset != nullptr);
assert(fs_converter == nullptr);
if (!CheckCharset(charset, error)) { fs_converter = IcuConverter::Create(charset, error);
error.FormatPrefix("Failed to initialize filesystem charset '%s': ", if (fs_converter == nullptr)
charset);
return false; return false;
}
fs_charset = charset;
FormatDebug(path_domain, FormatDebug(path_domain,
"SetFSCharset: fs charset is: %s", fs_charset.c_str()); "SetFSCharset: fs charset is: %s", fs_charset.c_str());
@ -91,6 +57,10 @@ SetFSCharset(const char *charset, Error &error)
void void
DeinitFSCharset() DeinitFSCharset()
{ {
#ifdef HAVE_ICU_CONVERTER
delete fs_converter;
fs_converter = nullptr;
#endif
} }
const char * const char *
@ -122,7 +92,7 @@ PathToUTF8(const char *path_fs)
assert(path_fs != nullptr); assert(path_fs != nullptr);
#ifdef HAVE_FS_CHARSET #ifdef HAVE_FS_CHARSET
if (fs_charset.empty()) { if (fs_converter == nullptr) {
#endif #endif
auto result = std::string(path_fs); auto result = std::string(path_fs);
FixSeparators(result); FixSeparators(result);
@ -130,26 +100,7 @@ PathToUTF8(const char *path_fs)
#ifdef HAVE_FS_CHARSET #ifdef HAVE_FS_CHARSET
} }
GIConv conv = g_iconv_open("utf-8", fs_charset.c_str()); auto result_path = fs_converter->ToUTF8(path_fs);
if (conv == reinterpret_cast<GIConv>(-1))
return std::string();
// g_iconv() does not need nul-terminator,
// std::string could be created without it too.
char path_utf8[MPD_PATH_MAX_UTF8 - 1];
char *in = const_cast<char *>(path_fs);
char *out = path_utf8;
size_t in_left = strlen(path_fs);
size_t out_left = sizeof(path_utf8);
size_t ret = g_iconv(conv, &in, &in_left, &out, &out_left);
g_iconv_close(conv);
if (ret == static_cast<size_t>(-1) || in_left > 0)
return std::string();
auto result_path = std::string(path_utf8, sizeof(path_utf8) - out_left);
FixSeparators(result_path); FixSeparators(result_path);
return result_path; return result_path;
#endif #endif
@ -162,12 +113,10 @@ PathFromUTF8(const char *path_utf8)
{ {
assert(path_utf8 != nullptr); assert(path_utf8 != nullptr);
if (fs_charset.empty()) if (fs_converter == nullptr)
return path_utf8; return path_utf8;
return g_convert(path_utf8, -1, return fs_converter->FromUTF8(path_utf8);
fs_charset.c_str(), "utf-8",
nullptr, nullptr, nullptr);
} }
#endif #endif

80
src/lib/icu/Converter.cxx Normal file
View File

@ -0,0 +1,80 @@
/*
* Copyright (C) 2003-2014 The Music Player Daemon Project
* http://www.musicpd.org
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
#include "config.h"
#include "Converter.hxx"
#include "util/Error.hxx"
#include "util/Domain.hxx"
#include <string.h>
#ifdef HAVE_GLIB
static constexpr Domain g_iconv_domain("g_iconv");
#endif
#ifdef HAVE_ICU_CONVERTER
IcuConverter *
IcuConverter::Create(const char *charset, Error &error)
{
GIConv to = g_iconv_open("utf-8", charset);
GIConv from = g_iconv_open(charset, "utf-8");
if (to == (GIConv)-1 || from == (GIConv)-1) {
if (to != (GIConv)-1)
g_iconv_close(to);
if (from != (GIConv)-1)
g_iconv_close(from);
error.Format(g_iconv_domain,
"Failed to initialize charset '%s'", charset);
return nullptr;
}
return new IcuConverter(to, from);
}
static std::string
DoConvert(GIConv conv, const char *src)
{
char buffer[4096];
char *in = const_cast<char *>(src);
char *out = buffer;
size_t in_left = strlen(src);
size_t out_left = sizeof(buffer);
size_t n = g_iconv(conv, &in, &in_left, &out, &out_left);
if (n == static_cast<size_t>(-1) || in_left > 0)
return std::string();
return std::string(buffer, sizeof(buffer) - out_left);
}
std::string
IcuConverter::ToUTF8(const char *s) const
{
return DoConvert(to_utf8, s);
}
std::string
IcuConverter::FromUTF8(const char *s) const
{
return DoConvert(from_utf8, s);
}
#endif

72
src/lib/icu/Converter.hxx Normal file
View File

@ -0,0 +1,72 @@
/*
* Copyright (C) 2003-2014 The Music Player Daemon Project
* http://www.musicpd.org
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
#ifndef MPD_ICU_CONVERTER_HXX
#define MPD_ICU_CONVERTER_HXX
#include "check.h"
#include "Compiler.h"
#ifdef HAVE_GLIB
#include <glib.h>
#define HAVE_ICU_CONVERTER
#endif
#ifdef HAVE_ICU_CONVERTER
#include <string>
class Error;
/**
* This class can convert strings with a certain character set to and
* from UTF-8.
*/
class IcuConverter {
const GIConv to_utf8, from_utf8;
IcuConverter(GIConv _to, GIConv _from)
:to_utf8(_to), from_utf8(_from) {}
public:
~IcuConverter() {
g_iconv_close(to_utf8);
g_iconv_close(from_utf8);
}
static IcuConverter *Create(const char *charset, Error &error);
/**
* Convert the string to UTF-8.
* Returns empty string on error.
*/
gcc_pure gcc_nonnull_all
std::string ToUTF8(const char *s) const;
/**
* Convert the string from UTF-8.
* Returns empty string on error.
*/
gcc_pure gcc_nonnull_all
std::string FromUTF8(const char *s) const;
};
#endif
#endif