From d7f545721bec93da1cdfb6abe096c83189dd8802 Mon Sep 17 00:00:00 2001 From: Max Kellermann Date: Tue, 15 Nov 2022 17:44:54 +0100 Subject: [PATCH] lib/icu/Canonicalize: use unorm2_normalize() instead of u_strFoldCase() unorm2_normalize() can case-fold as well, plus it applies Unicode normalization which MPD should do for proper string comparisons. --- NEWS | 1 + doc/protocol.rst | 4 +++- src/lib/icu/Canonicalize.cxx | 11 ++++++----- test/TestStringFilter.cxx | 38 ++++++++++++++++++++++++++++++++++++ 4 files changed, 48 insertions(+), 6 deletions(-) diff --git a/NEWS b/NEWS index 6ea828dfc..b7b96af42 100644 --- a/NEWS +++ b/NEWS @@ -6,6 +6,7 @@ ver 0.24 (not yet released) - limit "player" idle events to the current partition - operator "starts_with" - show PCRE support in "config" response + - apply Unicode normalization to case-insensitive filter expressions * archive - add option to disable archive plugins in mpd.conf * input diff --git a/doc/protocol.rst b/doc/protocol.rst index 9d8125698..8fde63888 100644 --- a/doc/protocol.rst +++ b/doc/protocol.rst @@ -235,7 +235,9 @@ of: (album == 'BAR'))` The :command:`find` commands are case sensitive, while -:command:`search` and related commands ignore case. +:command:`search` and related commands ignore case. The latter also +applies `Unicode normalization `__ +if MPD was compiled with `ICU `__ support. Prior to MPD 0.21, the syntax looked like this:: diff --git a/src/lib/icu/Canonicalize.cxx b/src/lib/icu/Canonicalize.cxx index 8f97cb9f5..08528c592 100644 --- a/src/lib/icu/Canonicalize.cxx +++ b/src/lib/icu/Canonicalize.cxx @@ -25,7 +25,7 @@ #include "util/AllocatedString.hxx" #ifdef HAVE_ICU -#include "FoldCase.hxx" +#include "Normalize.hxx" #include "Util.hxx" #include "util/AllocatedArray.hxx" #include "util/SpanCast.hxx" @@ -39,10 +39,11 @@ try { if (u.data() == nullptr) return {src}; - if (fold_case) - if (auto folded = IcuFoldCase(ToStringView(std::span{u})); - folded != nullptr) - u = std::move(folded); + if (auto n = fold_case + ? IcuNormalizeCaseFold(ToStringView(std::span{u})) + : IcuNormalize(ToStringView(std::span{u})); + n != nullptr) + u = std::move(n); return UCharToUTF8(ToStringView(std::span{u})); #else diff --git a/test/TestStringFilter.cxx b/test/TestStringFilter.cxx index 80ac38122..4b526759b 100644 --- a/test/TestStringFilter.cxx +++ b/test/TestStringFilter.cxx @@ -97,6 +97,10 @@ TEST_F(StringFilterTest, Latin) const StringFilter f{"nëedlé", false, false, false, false}; EXPECT_TRUE(f.Match("nëedlé")); +#if defined(HAVE_ICU) || defined(_WIN32) + EXPECT_TRUE(f.Match("nëedl\u00e9")); + // TODO EXPECT_TRUE(f.Match("nëedl\u0065\u0301")); +#endif EXPECT_FALSE(f.Match("NËEDLÉ")); EXPECT_FALSE(f.Match("needlé")); EXPECT_FALSE(f.Match("néedlé")); @@ -109,13 +113,47 @@ TEST_F(StringFilterTest, Latin) EXPECT_FALSE(f.Match("FOOnëedleBAR")); } +#if defined(HAVE_ICU) || defined(_WIN32) + +TEST_F(StringFilterTest, Normalize) +{ + const StringFilter f{"1①H", true, false, false, false}; + + EXPECT_TRUE(f.Match("1①H")); + EXPECT_TRUE(f.Match("¹₁H")); + EXPECT_TRUE(f.Match("①1ℌ")); + EXPECT_TRUE(f.Match("①1ℍ")); + EXPECT_FALSE(f.Match("21H")); + +#ifndef _WIN32 + // fails with Windows CompareStringEx() + EXPECT_TRUE(StringFilter("dž", true, false, false, false).Match("dž")); +#endif + + EXPECT_TRUE(StringFilter("\u212b", true, false, false, false).Match("\u0041\u030a")); + EXPECT_TRUE(StringFilter("\u212b", true, false, false, false).Match("\u00c5")); + + EXPECT_TRUE(StringFilter("\u1e69", true, false, false, false).Match("\u0073\u0323\u0307")); + +#ifndef _WIN32 + // fails with Windows CompareStringEx() + EXPECT_TRUE(StringFilter("\u1e69", true, false, false, false).Match("\u0073\u0307\u0323")); +#endif +} + +#endif + TEST_F(StringFilterTest, FoldCase) { const StringFilter f{"nëedlé", true, false, false, false}; EXPECT_TRUE(f.Match("nëedlé")); #if defined(HAVE_ICU) || defined(_WIN32) + EXPECT_TRUE(f.Match("nëedl\u00e9")); + EXPECT_TRUE(f.Match("nëedl\u0065\u0301")); EXPECT_TRUE(f.Match("NËEDLÉ")); + EXPECT_TRUE(f.Match("NËEDL\u00c9")); + EXPECT_TRUE(f.Match("NËEDL\u0045\u0301")); #endif EXPECT_FALSE(f.Match("needlé")); EXPECT_FALSE(f.Match("néedlé"));