lemmatizer: misc small improvements
This commit is contained in:
@@ -38,9 +38,9 @@ class LemmatizationRule {
|
||||
required String pattern,
|
||||
required String? replacement,
|
||||
required WordClass wordClass,
|
||||
validChildClasses,
|
||||
terminal = false,
|
||||
lookAheadBehind = const [''],
|
||||
List<WordClass>? validChildClasses,
|
||||
bool terminal = false,
|
||||
List<Pattern> lookAheadBehind = const [''],
|
||||
LemmatizationRuleType type = LemmatizationRuleType.suffix,
|
||||
}) : this(
|
||||
name: name,
|
||||
@@ -71,6 +71,7 @@ class AllomorphPattern {
|
||||
this.lookAheadBehind = const [''],
|
||||
});
|
||||
|
||||
/// Convert the [patterns] into regexes
|
||||
List<(String, Pattern)> get allPatternCombinations {
|
||||
final combinations = <(String, Pattern)>[];
|
||||
for (final l in lookAheadBehind) {
|
||||
@@ -94,6 +95,7 @@ class AllomorphPattern {
|
||||
return combinations;
|
||||
}
|
||||
|
||||
/// Check whether an input string matches any of the [patterns]
|
||||
bool matches(String word) {
|
||||
for (final (_, p) in allPatternCombinations) {
|
||||
if (p is String) {
|
||||
@@ -111,6 +113,9 @@ class AllomorphPattern {
|
||||
return false;
|
||||
}
|
||||
|
||||
/// Apply the replacement for this pattern.
|
||||
///
|
||||
/// If none of the [patterns] apply, this function returns `null`.
|
||||
List<String>? apply(String word) {
|
||||
for (final (affix, p) in allPatternCombinations) {
|
||||
switch ((type, p is RegExp)) {
|
||||
@@ -203,9 +208,10 @@ List<Lemmatized> _lemmatize(LemmatizationRule parentRule, String word) {
|
||||
|
||||
final filteredLemmatizationRules = parentRule.validChildClasses == null
|
||||
? lemmatizationRules
|
||||
: lemmatizationRules.where(
|
||||
(r) => parentRule.validChildClasses!.contains(r.wordClass),
|
||||
);
|
||||
: [
|
||||
for (final wordClass in parentRule.validChildClasses!)
|
||||
...lemmatizationRulesByWordClass[wordClass]!,
|
||||
];
|
||||
|
||||
for (final rule in filteredLemmatizationRules) {
|
||||
if (rule.matches(word)) {
|
||||
|
||||
@@ -3,8 +3,15 @@ import 'package:jadb/util/lemmatizer/rules/godan_verbs.dart';
|
||||
import 'package:jadb/util/lemmatizer/rules/i_adjectives.dart';
|
||||
import 'package:jadb/util/lemmatizer/rules/ichidan_verbs.dart';
|
||||
|
||||
List<LemmatizationRule> lemmatizationRules = [
|
||||
final List<LemmatizationRule> lemmatizationRules = List.unmodifiable([
|
||||
...ichidanVerbLemmatizationRules,
|
||||
...godanVerbLemmatizationRules,
|
||||
...iAdjectiveLemmatizationRules,
|
||||
];
|
||||
]);
|
||||
|
||||
final Map<WordClass, List<LemmatizationRule>> lemmatizationRulesByWordClass =
|
||||
Map.unmodifiable({
|
||||
WordClass.ichidanVerb: ichidanVerbLemmatizationRules,
|
||||
WordClass.iAdjective: iAdjectiveLemmatizationRules,
|
||||
WordClass.godanVerb: godanVerbLemmatizationRules,
|
||||
});
|
||||
|
||||
@@ -479,7 +479,7 @@ final LemmatizationRule godanVerbNegativePastDesire = LemmatizationRule(
|
||||
wordClass: WordClass.godanVerb,
|
||||
);
|
||||
|
||||
final List<LemmatizationRule> godanVerbLemmatizationRules = [
|
||||
final List<LemmatizationRule> godanVerbLemmatizationRules = List.unmodifiable([
|
||||
godanVerbBase,
|
||||
godanVerbNegative,
|
||||
godanVerbPast,
|
||||
@@ -506,4 +506,4 @@ final List<LemmatizationRule> godanVerbLemmatizationRules = [
|
||||
godanVerbNegativeDesire,
|
||||
godanVerbPastDesire,
|
||||
godanVerbNegativePastDesire,
|
||||
];
|
||||
]);
|
||||
|
||||
@@ -65,7 +65,7 @@ final LemmatizationRule iAdjectiveContinuative = LemmatizationRule.simple(
|
||||
wordClass: WordClass.iAdjective,
|
||||
);
|
||||
|
||||
final List<LemmatizationRule> iAdjectiveLemmatizationRules = [
|
||||
final List<LemmatizationRule> iAdjectiveLemmatizationRules = List.unmodifiable([
|
||||
iAdjectiveBase,
|
||||
iAdjectiveNegative,
|
||||
iAdjectivePast,
|
||||
@@ -74,4 +74,4 @@ final List<LemmatizationRule> iAdjectiveLemmatizationRules = [
|
||||
iAdjectiveConditional,
|
||||
iAdjectiveVolitional,
|
||||
iAdjectiveContinuative,
|
||||
];
|
||||
]);
|
||||
|
||||
@@ -267,31 +267,32 @@ final LemmatizationRule ichidanVerbNegativePastDesire =
|
||||
wordClass: WordClass.ichidanVerb,
|
||||
);
|
||||
|
||||
final List<LemmatizationRule> ichidanVerbLemmatizationRules = [
|
||||
ichidanVerbBase,
|
||||
ichidanVerbNegative,
|
||||
ichidanVerbPast,
|
||||
ichidanVerbTe,
|
||||
ichidanVerbTeiru,
|
||||
ichidanVerbTeita,
|
||||
ichidanVerbConditional,
|
||||
ichidanVerbVolitional,
|
||||
ichidanVerbPotential,
|
||||
ichidanVerbPassive,
|
||||
ichidanVerbCausative,
|
||||
ichidanVerbCausativePassive,
|
||||
ichidanVerbImperative,
|
||||
ichidanVerbNegativePast,
|
||||
ichidanVerbNegativeTe,
|
||||
ichidanVerbNegativeConditional,
|
||||
ichidanVerbNegativeVolitional,
|
||||
ichidanVerbNegativePotential,
|
||||
ichidanVerbNegativePassive,
|
||||
ichidanVerbNegativeCausative,
|
||||
ichidanVerbNegativeCausativePassive,
|
||||
ichidanVerbNegativeImperative,
|
||||
ichidanVerbDesire,
|
||||
ichidanVerbNegativeDesire,
|
||||
ichidanVerbPastDesire,
|
||||
ichidanVerbNegativePastDesire,
|
||||
];
|
||||
final List<LemmatizationRule> ichidanVerbLemmatizationRules =
|
||||
List.unmodifiable([
|
||||
ichidanVerbBase,
|
||||
ichidanVerbNegative,
|
||||
ichidanVerbPast,
|
||||
ichidanVerbTe,
|
||||
ichidanVerbTeiru,
|
||||
ichidanVerbTeita,
|
||||
ichidanVerbConditional,
|
||||
ichidanVerbVolitional,
|
||||
ichidanVerbPotential,
|
||||
ichidanVerbPassive,
|
||||
ichidanVerbCausative,
|
||||
ichidanVerbCausativePassive,
|
||||
ichidanVerbImperative,
|
||||
ichidanVerbNegativePast,
|
||||
ichidanVerbNegativeTe,
|
||||
ichidanVerbNegativeConditional,
|
||||
ichidanVerbNegativeVolitional,
|
||||
ichidanVerbNegativePotential,
|
||||
ichidanVerbNegativePassive,
|
||||
ichidanVerbNegativeCausative,
|
||||
ichidanVerbNegativeCausativePassive,
|
||||
ichidanVerbNegativeImperative,
|
||||
ichidanVerbDesire,
|
||||
ichidanVerbNegativeDesire,
|
||||
ichidanVerbPastDesire,
|
||||
ichidanVerbNegativePastDesire,
|
||||
]);
|
||||
|
||||
Reference in New Issue
Block a user