lemmatizer: misc small improvements

This commit is contained in:
2026-03-02 09:30:01 +09:00
parent c06fff9e5a
commit 4a6fd41f31
5 changed files with 54 additions and 40 deletions

View File

@@ -38,9 +38,9 @@ class LemmatizationRule {
required String pattern,
required String? replacement,
required WordClass wordClass,
validChildClasses,
terminal = false,
lookAheadBehind = const [''],
List<WordClass>? validChildClasses,
bool terminal = false,
List<Pattern> lookAheadBehind = const [''],
LemmatizationRuleType type = LemmatizationRuleType.suffix,
}) : this(
name: name,
@@ -71,6 +71,7 @@ class AllomorphPattern {
this.lookAheadBehind = const [''],
});
/// Convert the [patterns] into regexes
List<(String, Pattern)> get allPatternCombinations {
final combinations = <(String, Pattern)>[];
for (final l in lookAheadBehind) {
@@ -94,6 +95,7 @@ class AllomorphPattern {
return combinations;
}
/// Check whether an input string matches any of the [patterns]
bool matches(String word) {
for (final (_, p) in allPatternCombinations) {
if (p is String) {
@@ -111,6 +113,9 @@ class AllomorphPattern {
return false;
}
/// Apply the replacement for this pattern.
///
/// If none of the [patterns] apply, this function returns `null`.
List<String>? apply(String word) {
for (final (affix, p) in allPatternCombinations) {
switch ((type, p is RegExp)) {
@@ -203,9 +208,10 @@ List<Lemmatized> _lemmatize(LemmatizationRule parentRule, String word) {
final filteredLemmatizationRules = parentRule.validChildClasses == null
? lemmatizationRules
: lemmatizationRules.where(
(r) => parentRule.validChildClasses!.contains(r.wordClass),
);
: [
for (final wordClass in parentRule.validChildClasses!)
...lemmatizationRulesByWordClass[wordClass]!,
];
for (final rule in filteredLemmatizationRules) {
if (rule.matches(word)) {

View File

@@ -3,8 +3,15 @@ import 'package:jadb/util/lemmatizer/rules/godan_verbs.dart';
import 'package:jadb/util/lemmatizer/rules/i_adjectives.dart';
import 'package:jadb/util/lemmatizer/rules/ichidan_verbs.dart';
List<LemmatizationRule> lemmatizationRules = [
final List<LemmatizationRule> lemmatizationRules = List.unmodifiable([
...ichidanVerbLemmatizationRules,
...godanVerbLemmatizationRules,
...iAdjectiveLemmatizationRules,
];
]);
final Map<WordClass, List<LemmatizationRule>> lemmatizationRulesByWordClass =
Map.unmodifiable({
WordClass.ichidanVerb: ichidanVerbLemmatizationRules,
WordClass.iAdjective: iAdjectiveLemmatizationRules,
WordClass.godanVerb: godanVerbLemmatizationRules,
});

View File

@@ -479,7 +479,7 @@ final LemmatizationRule godanVerbNegativePastDesire = LemmatizationRule(
wordClass: WordClass.godanVerb,
);
final List<LemmatizationRule> godanVerbLemmatizationRules = [
final List<LemmatizationRule> godanVerbLemmatizationRules = List.unmodifiable([
godanVerbBase,
godanVerbNegative,
godanVerbPast,
@@ -506,4 +506,4 @@ final List<LemmatizationRule> godanVerbLemmatizationRules = [
godanVerbNegativeDesire,
godanVerbPastDesire,
godanVerbNegativePastDesire,
];
]);

View File

@@ -65,7 +65,7 @@ final LemmatizationRule iAdjectiveContinuative = LemmatizationRule.simple(
wordClass: WordClass.iAdjective,
);
final List<LemmatizationRule> iAdjectiveLemmatizationRules = [
final List<LemmatizationRule> iAdjectiveLemmatizationRules = List.unmodifiable([
iAdjectiveBase,
iAdjectiveNegative,
iAdjectivePast,
@@ -74,4 +74,4 @@ final List<LemmatizationRule> iAdjectiveLemmatizationRules = [
iAdjectiveConditional,
iAdjectiveVolitional,
iAdjectiveContinuative,
];
]);

View File

@@ -267,31 +267,32 @@ final LemmatizationRule ichidanVerbNegativePastDesire =
wordClass: WordClass.ichidanVerb,
);
final List<LemmatizationRule> ichidanVerbLemmatizationRules = [
ichidanVerbBase,
ichidanVerbNegative,
ichidanVerbPast,
ichidanVerbTe,
ichidanVerbTeiru,
ichidanVerbTeita,
ichidanVerbConditional,
ichidanVerbVolitional,
ichidanVerbPotential,
ichidanVerbPassive,
ichidanVerbCausative,
ichidanVerbCausativePassive,
ichidanVerbImperative,
ichidanVerbNegativePast,
ichidanVerbNegativeTe,
ichidanVerbNegativeConditional,
ichidanVerbNegativeVolitional,
ichidanVerbNegativePotential,
ichidanVerbNegativePassive,
ichidanVerbNegativeCausative,
ichidanVerbNegativeCausativePassive,
ichidanVerbNegativeImperative,
ichidanVerbDesire,
ichidanVerbNegativeDesire,
ichidanVerbPastDesire,
ichidanVerbNegativePastDesire,
];
final List<LemmatizationRule> ichidanVerbLemmatizationRules =
List.unmodifiable([
ichidanVerbBase,
ichidanVerbNegative,
ichidanVerbPast,
ichidanVerbTe,
ichidanVerbTeiru,
ichidanVerbTeita,
ichidanVerbConditional,
ichidanVerbVolitional,
ichidanVerbPotential,
ichidanVerbPassive,
ichidanVerbCausative,
ichidanVerbCausativePassive,
ichidanVerbImperative,
ichidanVerbNegativePast,
ichidanVerbNegativeTe,
ichidanVerbNegativeConditional,
ichidanVerbNegativeVolitional,
ichidanVerbNegativePotential,
ichidanVerbNegativePassive,
ichidanVerbNegativeCausative,
ichidanVerbNegativeCausativePassive,
ichidanVerbNegativeImperative,
ichidanVerbDesire,
ichidanVerbNegativeDesire,
ichidanVerbPastDesire,
ichidanVerbNegativePastDesire,
]);