icu/Collate: use u_strFoldCase() instead of ucol_getSortKey()

Turns out ucol_getSortKey() does not what I thought it does.
This commit is contained in:
Max Kellermann 2014-05-12 14:35:25 +02:00
parent 317a98a5a9
commit 41507d8129

View File

@ -23,6 +23,7 @@
#ifdef HAVE_ICU #ifdef HAVE_ICU
#include "Error.hxx" #include "Error.hxx"
#include "util/WritableBuffer.hxx" #include "util/WritableBuffer.hxx"
#include "util/ConstBuffer.hxx"
#include "util/Error.hxx" #include "util/Error.hxx"
#include "util/Domain.hxx" #include "util/Domain.hxx"
@ -97,6 +98,28 @@ UCharFromUTF8(const char *src)
return { dest, size_t(dest_length) }; return { dest, size_t(dest_length) };
} }
static WritableBuffer<char>
UCharToUTF8(ConstBuffer<UChar> src)
{
assert(!src.IsNull());
/* worst-case estimate */
size_t dest_capacity = 4 * src.size;
char *dest = new char[dest_capacity];
UErrorCode error_code = U_ZERO_ERROR;
int32_t dest_length;
u_strToUTF8(dest, dest_capacity, &dest_length, src.data, src.size,
&error_code);
if (U_FAILURE(error_code)) {
delete[] dest;
return nullptr;
}
return { dest, size_t(dest_length) };
}
#endif #endif
gcc_pure gcc_pure
@ -147,19 +170,27 @@ IcuCaseFold(const char *src)
if (u.IsNull()) if (u.IsNull())
return std::string(src); return std::string(src);
size_t dest_length = ucol_getSortKey(collator, u.data, u.size, size_t folded_capacity = u.size * 2u;
nullptr, 0); UChar *folded = new UChar[folded_capacity];
if (dest_length == 0) {
UErrorCode error_code = U_ZERO_ERROR;
size_t folded_length = u_strFoldCase(folded, folded_capacity,
u.data, u.size,
U_FOLD_CASE_DEFAULT,
&error_code);
delete[] u.data; delete[] u.data;
if (folded_length == 0 || error_code != U_ZERO_ERROR) {
delete[] folded;
return std::string(src); return std::string(src);
} }
uint8_t *dest = new uint8_t[dest_length]; auto result2 = UCharToUTF8({folded, folded_length});
ucol_getSortKey(collator, u.data, u.size, delete[] folded;
dest, dest_length); if (result2.IsNull())
delete[] u.data; return std::string(src);
std::string result((const char *)dest);
delete[] dest; std::string result(result2.data, result2.size);
delete[] result2.data;
#elif defined(HAVE_GLIB) #elif defined(HAVE_GLIB)
char *tmp = g_utf8_casefold(src, -1); char *tmp = g_utf8_casefold(src, -1);
std::string result(tmp); std::string result(tmp);