lib/icu/CaseFold: rename to IcuCanonicalize() and add flag parameter

Prepare for adding more transformations.
This commit is contained in:
Max Kellermann 2022-11-15 17:09:02 +01:00
parent 852df2239e
commit 8b72cb64b2
4 changed files with 30 additions and 24 deletions

View File

@ -17,10 +17,10 @@
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/ */
#include "CaseFold.hxx" #include "Canonicalize.hxx"
#include "config.h" #include "config.h"
#ifdef HAVE_ICU_CASE_FOLD #ifdef HAVE_ICU_CANONICALIZE
#include "util/AllocatedString.hxx" #include "util/AllocatedString.hxx"
@ -32,18 +32,19 @@
#endif #endif
AllocatedString AllocatedString
IcuCaseFold(std::string_view src) noexcept IcuCanonicalize(std::string_view src, bool fold_case) noexcept
try { try {
#ifdef HAVE_ICU #ifdef HAVE_ICU
auto u = UCharFromUTF8(src); auto u = UCharFromUTF8(src);
if (u.data() == nullptr) if (u.data() == nullptr)
return {src}; return {src};
auto folded = IcuFoldCase(ToStringView(std::span{u})); if (fold_case)
if (folded == nullptr) if (auto folded = IcuFoldCase(ToStringView(std::span{u}));
return {src}; folded != nullptr)
u = std::move(folded);
return UCharToUTF8(ToStringView(std::span{folded})); return UCharToUTF8(ToStringView(std::span{u}));
#else #else
#error not implemented #error not implemented
#endif #endif
@ -51,4 +52,4 @@ try {
return {src}; return {src};
} }
#endif /* HAVE_ICU_CASE_FOLD */ #endif /* HAVE_ICU_CANONICALIZE */

View File

@ -17,21 +17,25 @@
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/ */
#ifndef MPD_ICU_CASE_FOLD_HXX #pragma once
#define MPD_ICU_CASE_FOLD_HXX
#include "config.h" #include "config.h"
#ifdef HAVE_ICU #ifdef HAVE_ICU
#define HAVE_ICU_CASE_FOLD #define HAVE_ICU_CANONICALIZE
#include <string_view> #include <string_view>
class AllocatedString; class AllocatedString;
/**
* Transform the given string to "canonical" form to allow fuzzy
* string comparisons. The full set of features (if ICU is being
* used):
*
* - case folding (optional)
*/
AllocatedString AllocatedString
IcuCaseFold(std::string_view src) noexcept; IcuCanonicalize(std::string_view src, bool fold_case) noexcept;
#endif
#endif #endif

View File

@ -18,7 +18,7 @@
*/ */
#include "Compare.hxx" #include "Compare.hxx"
#include "CaseFold.hxx" #include "Canonicalize.hxx"
#include "util/StringAPI.hxx" #include "util/StringAPI.hxx"
#include "util/StringCompare.hxx" #include "util/StringCompare.hxx"
#include "config.h" #include "config.h"
@ -28,10 +28,10 @@
#include <windows.h> #include <windows.h>
#endif #endif
#ifdef HAVE_ICU_CASE_FOLD #ifdef HAVE_ICU_CANONICALIZE
IcuCompare::IcuCompare(std::string_view _needle) noexcept IcuCompare::IcuCompare(std::string_view _needle) noexcept
:needle(IcuCaseFold(_needle)) {} :needle(IcuCanonicalize(_needle, true)) {}
#elif defined(_WIN32) #elif defined(_WIN32)
@ -54,8 +54,8 @@ IcuCompare::IcuCompare(std::string_view _needle) noexcept
bool bool
IcuCompare::operator==(const char *haystack) const noexcept IcuCompare::operator==(const char *haystack) const noexcept
{ {
#ifdef HAVE_ICU_CASE_FOLD #ifdef HAVE_ICU_CANONICALIZE
return StringIsEqual(IcuCaseFold(haystack).c_str(), needle.c_str()); return StringIsEqual(IcuCanonicalize(haystack, true).c_str(), needle.c_str());
#elif defined(_WIN32) #elif defined(_WIN32)
if (needle == nullptr) if (needle == nullptr)
/* the MultiByteToWideChar() call in the constructor /* the MultiByteToWideChar() call in the constructor
@ -80,8 +80,8 @@ IcuCompare::operator==(const char *haystack) const noexcept
bool bool
IcuCompare::IsIn(const char *haystack) const noexcept IcuCompare::IsIn(const char *haystack) const noexcept
{ {
#ifdef HAVE_ICU_CASE_FOLD #ifdef HAVE_ICU_CANONICALIZE
return StringFind(IcuCaseFold(haystack).c_str(), return StringFind(IcuCanonicalize(haystack, true).c_str(),
needle.c_str()) != nullptr; needle.c_str()) != nullptr;
#elif defined(_WIN32) #elif defined(_WIN32)
if (needle == nullptr) if (needle == nullptr)
@ -117,8 +117,9 @@ IcuCompare::IsIn(const char *haystack) const noexcept
bool bool
IcuCompare::StartsWith(const char *haystack) const noexcept IcuCompare::StartsWith(const char *haystack) const noexcept
{ {
#ifdef HAVE_ICU_CASE_FOLD #ifdef HAVE_ICU_CANONICALIZE
return StringStartsWith(IcuCaseFold(haystack).c_str(), needle); return StringStartsWith(IcuCanonicalize(haystack, true).c_str(),
needle);
#elif defined(_WIN32) #elif defined(_WIN32)
if (needle == nullptr) if (needle == nullptr)
/* the MultiByteToWideChar() call in the constructor /* the MultiByteToWideChar() call in the constructor

View File

@ -2,7 +2,7 @@ icu_dep = dependency('icu-i18n', version: '>= 50', required: get_option('icu'))
conf.set('HAVE_ICU', icu_dep.found()) conf.set('HAVE_ICU', icu_dep.found())
icu_sources = [ icu_sources = [
'CaseFold.cxx', 'Canonicalize.cxx',
'Compare.cxx', 'Compare.cxx',
'Collate.cxx', 'Collate.cxx',
'Converter.cxx', 'Converter.cxx',