From 4a6fd41f3138b5517efd5cb22ca9b07160e7e64b Mon Sep 17 00:00:00 2001 From: h7x4 Date: Mon, 2 Mar 2026 09:30:01 +0900 Subject: [PATCH] lemmatizer: misc small improvements --- lib/util/lemmatizer/lemmatizer.dart | 18 ++++--- lib/util/lemmatizer/rules.dart | 11 +++- lib/util/lemmatizer/rules/godan_verbs.dart | 4 +- lib/util/lemmatizer/rules/i_adjectives.dart | 4 +- lib/util/lemmatizer/rules/ichidan_verbs.dart | 57 ++++++++++---------- 5 files changed, 54 insertions(+), 40 deletions(-) diff --git a/lib/util/lemmatizer/lemmatizer.dart b/lib/util/lemmatizer/lemmatizer.dart index 7e68d90..1fbe80a 100644 --- a/lib/util/lemmatizer/lemmatizer.dart +++ b/lib/util/lemmatizer/lemmatizer.dart @@ -38,9 +38,9 @@ class LemmatizationRule { required String pattern, required String? replacement, required WordClass wordClass, - validChildClasses, - terminal = false, - lookAheadBehind = const [''], + List? validChildClasses, + bool terminal = false, + List lookAheadBehind = const [''], LemmatizationRuleType type = LemmatizationRuleType.suffix, }) : this( name: name, @@ -71,6 +71,7 @@ class AllomorphPattern { this.lookAheadBehind = const [''], }); + /// Convert the [patterns] into regexes List<(String, Pattern)> get allPatternCombinations { final combinations = <(String, Pattern)>[]; for (final l in lookAheadBehind) { @@ -94,6 +95,7 @@ class AllomorphPattern { return combinations; } + /// Check whether an input string matches any of the [patterns] bool matches(String word) { for (final (_, p) in allPatternCombinations) { if (p is String) { @@ -111,6 +113,9 @@ class AllomorphPattern { return false; } + /// Apply the replacement for this pattern. + /// + /// If none of the [patterns] apply, this function returns `null`. List? apply(String word) { for (final (affix, p) in allPatternCombinations) { switch ((type, p is RegExp)) { @@ -203,9 +208,10 @@ List _lemmatize(LemmatizationRule parentRule, String word) { final filteredLemmatizationRules = parentRule.validChildClasses == null ? lemmatizationRules - : lemmatizationRules.where( - (r) => parentRule.validChildClasses!.contains(r.wordClass), - ); + : [ + for (final wordClass in parentRule.validChildClasses!) + ...lemmatizationRulesByWordClass[wordClass]!, + ]; for (final rule in filteredLemmatizationRules) { if (rule.matches(word)) { diff --git a/lib/util/lemmatizer/rules.dart b/lib/util/lemmatizer/rules.dart index 58331ea..d33106a 100644 --- a/lib/util/lemmatizer/rules.dart +++ b/lib/util/lemmatizer/rules.dart @@ -3,8 +3,15 @@ import 'package:jadb/util/lemmatizer/rules/godan_verbs.dart'; import 'package:jadb/util/lemmatizer/rules/i_adjectives.dart'; import 'package:jadb/util/lemmatizer/rules/ichidan_verbs.dart'; -List lemmatizationRules = [ +final List lemmatizationRules = List.unmodifiable([ ...ichidanVerbLemmatizationRules, ...godanVerbLemmatizationRules, ...iAdjectiveLemmatizationRules, -]; +]); + +final Map> lemmatizationRulesByWordClass = + Map.unmodifiable({ + WordClass.ichidanVerb: ichidanVerbLemmatizationRules, + WordClass.iAdjective: iAdjectiveLemmatizationRules, + WordClass.godanVerb: godanVerbLemmatizationRules, + }); diff --git a/lib/util/lemmatizer/rules/godan_verbs.dart b/lib/util/lemmatizer/rules/godan_verbs.dart index e7c1365..4dc3ec1 100644 --- a/lib/util/lemmatizer/rules/godan_verbs.dart +++ b/lib/util/lemmatizer/rules/godan_verbs.dart @@ -479,7 +479,7 @@ final LemmatizationRule godanVerbNegativePastDesire = LemmatizationRule( wordClass: WordClass.godanVerb, ); -final List godanVerbLemmatizationRules = [ +final List godanVerbLemmatizationRules = List.unmodifiable([ godanVerbBase, godanVerbNegative, godanVerbPast, @@ -506,4 +506,4 @@ final List godanVerbLemmatizationRules = [ godanVerbNegativeDesire, godanVerbPastDesire, godanVerbNegativePastDesire, -]; +]); diff --git a/lib/util/lemmatizer/rules/i_adjectives.dart b/lib/util/lemmatizer/rules/i_adjectives.dart index 8ab2947..3165153 100644 --- a/lib/util/lemmatizer/rules/i_adjectives.dart +++ b/lib/util/lemmatizer/rules/i_adjectives.dart @@ -65,7 +65,7 @@ final LemmatizationRule iAdjectiveContinuative = LemmatizationRule.simple( wordClass: WordClass.iAdjective, ); -final List iAdjectiveLemmatizationRules = [ +final List iAdjectiveLemmatizationRules = List.unmodifiable([ iAdjectiveBase, iAdjectiveNegative, iAdjectivePast, @@ -74,4 +74,4 @@ final List iAdjectiveLemmatizationRules = [ iAdjectiveConditional, iAdjectiveVolitional, iAdjectiveContinuative, -]; +]); diff --git a/lib/util/lemmatizer/rules/ichidan_verbs.dart b/lib/util/lemmatizer/rules/ichidan_verbs.dart index c798170..7d4f7ab 100644 --- a/lib/util/lemmatizer/rules/ichidan_verbs.dart +++ b/lib/util/lemmatizer/rules/ichidan_verbs.dart @@ -267,31 +267,32 @@ final LemmatizationRule ichidanVerbNegativePastDesire = wordClass: WordClass.ichidanVerb, ); -final List ichidanVerbLemmatizationRules = [ - ichidanVerbBase, - ichidanVerbNegative, - ichidanVerbPast, - ichidanVerbTe, - ichidanVerbTeiru, - ichidanVerbTeita, - ichidanVerbConditional, - ichidanVerbVolitional, - ichidanVerbPotential, - ichidanVerbPassive, - ichidanVerbCausative, - ichidanVerbCausativePassive, - ichidanVerbImperative, - ichidanVerbNegativePast, - ichidanVerbNegativeTe, - ichidanVerbNegativeConditional, - ichidanVerbNegativeVolitional, - ichidanVerbNegativePotential, - ichidanVerbNegativePassive, - ichidanVerbNegativeCausative, - ichidanVerbNegativeCausativePassive, - ichidanVerbNegativeImperative, - ichidanVerbDesire, - ichidanVerbNegativeDesire, - ichidanVerbPastDesire, - ichidanVerbNegativePastDesire, -]; +final List ichidanVerbLemmatizationRules = + List.unmodifiable([ + ichidanVerbBase, + ichidanVerbNegative, + ichidanVerbPast, + ichidanVerbTe, + ichidanVerbTeiru, + ichidanVerbTeita, + ichidanVerbConditional, + ichidanVerbVolitional, + ichidanVerbPotential, + ichidanVerbPassive, + ichidanVerbCausative, + ichidanVerbCausativePassive, + ichidanVerbImperative, + ichidanVerbNegativePast, + ichidanVerbNegativeTe, + ichidanVerbNegativeConditional, + ichidanVerbNegativeVolitional, + ichidanVerbNegativePotential, + ichidanVerbNegativePassive, + ichidanVerbNegativeCausative, + ichidanVerbNegativeCausativePassive, + ichidanVerbNegativeImperative, + ichidanVerbDesire, + ichidanVerbNegativeDesire, + ichidanVerbPastDesire, + ichidanVerbNegativePastDesire, + ]);