lib/icu/Converter: add ICU-based backend

This commit is contained in:
Max Kellermann 2014-11-30 00:59:43 +01:00
parent 4658bd8208
commit bc00c38f9d
4 changed files with 119 additions and 4 deletions

View File

@ -465,7 +465,11 @@ libicu_a_CPPFLAGS = $(AM_CPPFLAGS) \
$(ICU_CFLAGS) $(ICU_CFLAGS)
ICU_LDADD = libicu.a $(ICU_LIBS) ICU_LDADD = libicu.a $(ICU_LIBS)
if HAVE_ICU
else
ICU_LDADD += $(GLIB_LIBS) ICU_LDADD += $(GLIB_LIBS)
endif
# PCM library # PCM library

View File

@ -25,7 +25,7 @@
#include <string> #include <string>
#ifdef HAVE_GLIB #if defined(HAVE_ICU) || defined(HAVE_GLIB)
#define HAVE_FS_CHARSET #define HAVE_FS_CHARSET
#endif #endif

View File

@ -19,20 +19,48 @@
#include "config.h" #include "config.h"
#include "Converter.hxx" #include "Converter.hxx"
#include "Error.hxx"
#include "util/Error.hxx" #include "util/Error.hxx"
#include "util/Domain.hxx" #include "util/Macros.hxx"
#include "util/WritableBuffer.hxx"
#include "util/ConstBuffer.hxx"
#include <string.h> #include <string.h>
#ifdef HAVE_GLIB #ifdef HAVE_ICU
#include "Util.hxx"
#include <unicode/ucnv.h>
#elif defined(HAVE_GLIB)
#include "util/Domain.hxx"
static constexpr Domain g_iconv_domain("g_iconv"); static constexpr Domain g_iconv_domain("g_iconv");
#endif #endif
#ifdef HAVE_ICU
IcuConverter::~IcuConverter()
{
ucnv_close(converter);
}
#endif
#ifdef HAVE_ICU_CONVERTER #ifdef HAVE_ICU_CONVERTER
IcuConverter * IcuConverter *
IcuConverter::Create(const char *charset, Error &error) IcuConverter::Create(const char *charset, Error &error)
{ {
#ifdef HAVE_ICU
UErrorCode code = U_ZERO_ERROR;
UConverter *converter = ucnv_open(charset, &code);
if (converter == nullptr) {
error.Format(icu_domain, int(code),
"Failed to initialize charset '%s': %s",
charset, u_errorName(code));
return nullptr;
}
return new IcuConverter(converter);
#elif defined(HAVE_GLIB)
GIConv to = g_iconv_open("utf-8", charset); GIConv to = g_iconv_open("utf-8", charset);
GIConv from = g_iconv_open(charset, "utf-8"); GIConv from = g_iconv_open(charset, "utf-8");
if (to == (GIConv)-1 || from == (GIConv)-1) { if (to == (GIConv)-1 || from == (GIConv)-1) {
@ -46,8 +74,12 @@ IcuConverter::Create(const char *charset, Error &error)
} }
return new IcuConverter(to, from); return new IcuConverter(to, from);
#endif
} }
#ifdef HAVE_ICU
#elif defined(HAVE_GLIB)
static std::string static std::string
DoConvert(GIConv conv, const char *src) DoConvert(GIConv conv, const char *src)
{ {
@ -66,16 +98,72 @@ DoConvert(GIConv conv, const char *src)
return std::string(buffer, sizeof(buffer) - out_left); return std::string(buffer, sizeof(buffer) - out_left);
} }
#endif
std::string std::string
IcuConverter::ToUTF8(const char *s) const IcuConverter::ToUTF8(const char *s) const
{ {
#ifdef HAVE_ICU
const ScopeLock protect(mutex);
ucnv_resetToUnicode(converter);
// TODO: dynamic buffer?
UChar buffer[4096], *target = buffer;
const char *source = s;
UErrorCode code = U_ZERO_ERROR;
ucnv_toUnicode(converter, &target, buffer + ARRAY_SIZE(buffer),
&source, source + strlen(source),
nullptr, true, &code);
if (code != U_ZERO_ERROR)
return std::string();
const size_t target_length = target - buffer;
const auto u = UCharToUTF8({buffer, target_length});
if (u.IsNull())
return std::string();
std::string result(u.data, u.size);
delete[] u.data;
return result;
#elif defined(HAVE_GLIB)
return DoConvert(to_utf8, s); return DoConvert(to_utf8, s);
#endif
} }
std::string std::string
IcuConverter::FromUTF8(const char *s) const IcuConverter::FromUTF8(const char *s) const
{ {
#ifdef HAVE_ICU
const ScopeLock protect(mutex);
const auto u = UCharFromUTF8(s);
if (u.IsNull())
return std::string();
ucnv_resetFromUnicode(converter);
// TODO: dynamic buffer?
char buffer[4096], *target = buffer;
const UChar *source = u.data;
UErrorCode code = U_ZERO_ERROR;
ucnv_fromUnicode(converter, &target, buffer + ARRAY_SIZE(buffer),
&source, u.end(),
nullptr, true, &code);
delete[] u.data;
if (code != U_ZERO_ERROR)
return std::string();
return std::string(buffer, target);
#elif defined(HAVE_GLIB)
return DoConvert(from_utf8, s); return DoConvert(from_utf8, s);
#endif
} }
#endif #endif

View File

@ -23,7 +23,10 @@
#include "check.h" #include "check.h"
#include "Compiler.h" #include "Compiler.h"
#ifdef HAVE_GLIB #ifdef HAVE_ICU
#include "thread/Mutex.hxx"
#define HAVE_ICU_CONVERTER
#elif defined(HAVE_GLIB)
#include <glib.h> #include <glib.h>
#define HAVE_ICU_CONVERTER #define HAVE_ICU_CONVERTER
#endif #endif
@ -34,21 +37,41 @@
class Error; class Error;
#ifdef HAVE_ICU
struct UConverter;
#endif
/** /**
* This class can convert strings with a certain character set to and * This class can convert strings with a certain character set to and
* from UTF-8. * from UTF-8.
*/ */
class IcuConverter { class IcuConverter {
#ifdef HAVE_ICU
/**
* ICU's UConverter class is not thread-safe. This mutex
* serializes simultaneous calls.
*/
mutable Mutex mutex;
UConverter *const converter;
IcuConverter(UConverter *_converter):converter(_converter) {}
#elif defined(HAVE_GLIB)
const GIConv to_utf8, from_utf8; const GIConv to_utf8, from_utf8;
IcuConverter(GIConv _to, GIConv _from) IcuConverter(GIConv _to, GIConv _from)
:to_utf8(_to), from_utf8(_from) {} :to_utf8(_to), from_utf8(_from) {}
#endif
public: public:
#ifdef HAVE_ICU
~IcuConverter();
#elif defined(HAVE_GLIB)
~IcuConverter() { ~IcuConverter() {
g_iconv_close(to_utf8); g_iconv_close(to_utf8);
g_iconv_close(from_utf8); g_iconv_close(from_utf8);
} }
#endif
static IcuConverter *Create(const char *charset, Error &error); static IcuConverter *Create(const char *charset, Error &error);