diff --git a/doc/protocol.rst b/doc/protocol.rst index 8fde63888..aa2cbb272 100644 --- a/doc/protocol.rst +++ b/doc/protocol.rst @@ -237,6 +237,7 @@ of: The :command:`find` commands are case sensitive, while :command:`search` and related commands ignore case. The latter also applies `Unicode normalization `__ +and converts all punctuation to ASCII equivalents if MPD was compiled with `ICU `__ support. Prior to MPD 0.21, the syntax looked like this:: diff --git a/src/lib/icu/Canonicalize.cxx b/src/lib/icu/Canonicalize.cxx index 08528c592..aca407584 100644 --- a/src/lib/icu/Canonicalize.cxx +++ b/src/lib/icu/Canonicalize.cxx @@ -26,15 +26,48 @@ #ifdef HAVE_ICU #include "Normalize.hxx" +#include "Transliterator.hxx" #include "Util.hxx" #include "util/AllocatedArray.hxx" #include "util/SpanCast.hxx" #endif +#ifdef HAVE_ICU + +using std::string_view_literals::operator""sv; + +static IcuTransliterator *transliterator; + +void +IcuCanonicalizeInit() +{ + assert(transliterator == nullptr); + + const auto id = + /* convert all punctuation to ASCII equivalents */ + "[:Punctuation:] Latin-ASCII;"sv; + + transliterator = new IcuTransliterator(ToStringView(std::span{UCharFromUTF8(id)}), + {}); +} + +void +IcuCanonicalizeFinish() noexcept +{ + assert(transliterator != nullptr); + + delete transliterator; + transliterator = nullptr; +} + +#endif + AllocatedString IcuCanonicalize(std::string_view src, bool fold_case) noexcept try { #ifdef HAVE_ICU + assert(transliterator != nullptr); + auto u = UCharFromUTF8(src); if (u.data() == nullptr) return {src}; @@ -45,6 +78,10 @@ try { n != nullptr) u = std::move(n); + if (auto t = transliterator->Transliterate(ToStringView(std::span{u})); + t != nullptr) + u = std::move(t); + return UCharToUTF8(ToStringView(std::span{u})); #else #error not implemented diff --git a/src/lib/icu/Canonicalize.hxx b/src/lib/icu/Canonicalize.hxx index d2d33c004..33c0f8202 100644 --- a/src/lib/icu/Canonicalize.hxx +++ b/src/lib/icu/Canonicalize.hxx @@ -28,6 +28,15 @@ class AllocatedString; +/** + * Throws on error. + */ +void +IcuCanonicalizeInit(); + +void +IcuCanonicalizeFinish() noexcept; + /** * Transform the given string to "canonical" form to allow fuzzy * string comparisons. The full set of features (if ICU is being diff --git a/src/lib/icu/Init.cxx b/src/lib/icu/Init.cxx index 305b7a39a..dda779597 100644 --- a/src/lib/icu/Init.cxx +++ b/src/lib/icu/Init.cxx @@ -19,6 +19,7 @@ #include "Init.hxx" #include "Collate.hxx" +#include "Canonicalize.hxx" #include "util/RuntimeError.hxx" #include @@ -33,11 +34,13 @@ IcuInit() u_errorName(code)); IcuCollateInit(); + IcuCanonicalizeInit(); } void IcuFinish() noexcept { + IcuCanonicalizeFinish(); IcuCollateFinish(); u_cleanup(); diff --git a/test/TestStringFilter.cxx b/test/TestStringFilter.cxx index 4b526759b..a09bf1884 100644 --- a/test/TestStringFilter.cxx +++ b/test/TestStringFilter.cxx @@ -143,6 +143,18 @@ TEST_F(StringFilterTest, Normalize) #endif +#ifdef HAVE_ICU + +TEST_F(StringFilterTest, Transliterate) +{ + const StringFilter f{"'", true, false, false, false}; + + EXPECT_TRUE(f.Match("’")); + EXPECT_FALSE(f.Match("\"")); +} + +#endif + TEST_F(StringFilterTest, FoldCase) { const StringFilter f{"nëedlé", true, false, false, false};