lib/icu/Canonicalize: use unorm2_normalize() instead of u_strFoldCase()

unorm2_normalize() can case-fold as well, plus it applies Unicode
normalization which MPD should do for proper string comparisons.
This commit is contained in:
Max Kellermann 2022-11-15 17:44:54 +01:00
parent 8b72cb64b2
commit d7f545721b
4 changed files with 48 additions and 6 deletions

1
NEWS
View File

@ -6,6 +6,7 @@ ver 0.24 (not yet released)
- limit "player" idle events to the current partition
- operator "starts_with"
- show PCRE support in "config" response
- apply Unicode normalization to case-insensitive filter expressions
* archive
- add option to disable archive plugins in mpd.conf
* input

View File

@ -235,7 +235,9 @@ of:
(album == 'BAR'))`
The :command:`find` commands are case sensitive, while
:command:`search` and related commands ignore case.
:command:`search` and related commands ignore case. The latter also
applies `Unicode normalization <https://unicode.org/reports/tr15/>`__
if MPD was compiled with `ICU <https://icu.unicode.org/>`__ support.
Prior to MPD 0.21, the syntax looked like this::

View File

@ -25,7 +25,7 @@
#include "util/AllocatedString.hxx"
#ifdef HAVE_ICU
#include "FoldCase.hxx"
#include "Normalize.hxx"
#include "Util.hxx"
#include "util/AllocatedArray.hxx"
#include "util/SpanCast.hxx"
@ -39,10 +39,11 @@ try {
if (u.data() == nullptr)
return {src};
if (fold_case)
if (auto folded = IcuFoldCase(ToStringView(std::span{u}));
folded != nullptr)
u = std::move(folded);
if (auto n = fold_case
? IcuNormalizeCaseFold(ToStringView(std::span{u}))
: IcuNormalize(ToStringView(std::span{u}));
n != nullptr)
u = std::move(n);
return UCharToUTF8(ToStringView(std::span{u}));
#else

View File

@ -97,6 +97,10 @@ TEST_F(StringFilterTest, Latin)
const StringFilter f{"nëedlé", false, false, false, false};
EXPECT_TRUE(f.Match("nëedlé"));
#if defined(HAVE_ICU) || defined(_WIN32)
EXPECT_TRUE(f.Match("nëedl\u00e9"));
// TODO EXPECT_TRUE(f.Match("nëedl\u0065\u0301"));
#endif
EXPECT_FALSE(f.Match("NËEDLÉ"));
EXPECT_FALSE(f.Match("needlé"));
EXPECT_FALSE(f.Match("néedlé"));
@ -109,13 +113,47 @@ TEST_F(StringFilterTest, Latin)
EXPECT_FALSE(f.Match("FOOnëedleBAR"));
}
#if defined(HAVE_ICU) || defined(_WIN32)
TEST_F(StringFilterTest, Normalize)
{
const StringFilter f{"1①H", true, false, false, false};
EXPECT_TRUE(f.Match("1①H"));
EXPECT_TRUE(f.Match("¹₁H"));
EXPECT_TRUE(f.Match("①1"));
EXPECT_TRUE(f.Match("①1"));
EXPECT_FALSE(f.Match("21H"));
#ifndef _WIN32
// fails with Windows CompareStringEx()
EXPECT_TRUE(StringFilter("dž", true, false, false, false).Match(""));
#endif
EXPECT_TRUE(StringFilter("\u212b", true, false, false, false).Match("\u0041\u030a"));
EXPECT_TRUE(StringFilter("\u212b", true, false, false, false).Match("\u00c5"));
EXPECT_TRUE(StringFilter("\u1e69", true, false, false, false).Match("\u0073\u0323\u0307"));
#ifndef _WIN32
// fails with Windows CompareStringEx()
EXPECT_TRUE(StringFilter("\u1e69", true, false, false, false).Match("\u0073\u0307\u0323"));
#endif
}
#endif
TEST_F(StringFilterTest, FoldCase)
{
const StringFilter f{"nëedlé", true, false, false, false};
EXPECT_TRUE(f.Match("nëedlé"));
#if defined(HAVE_ICU) || defined(_WIN32)
EXPECT_TRUE(f.Match("nëedl\u00e9"));
EXPECT_TRUE(f.Match("nëedl\u0065\u0301"));
EXPECT_TRUE(f.Match("NËEDLÉ"));
EXPECT_TRUE(f.Match("NËEDL\u00c9"));
EXPECT_TRUE(f.Match("NËEDL\u0045\u0301"));
#endif
EXPECT_FALSE(f.Match("needlé"));
EXPECT_FALSE(f.Match("néedlé"));