332 lines
9.8 KiB
Dart
332 lines
9.8 KiB
Dart
import 'package:jadb/util/lemmatizer/lemmatizer.dart';
|
|
import 'package:jadb/util/text_filtering.dart';
|
|
|
|
final List<Pattern> _lookBehinds = [
|
|
kanjiRegex,
|
|
'き',
|
|
'ぎ',
|
|
'し',
|
|
'じ',
|
|
'ち',
|
|
'ぢ',
|
|
'に',
|
|
'ひ',
|
|
'び',
|
|
'び',
|
|
'み',
|
|
'り',
|
|
'け',
|
|
'げ',
|
|
'せ',
|
|
'ぜ',
|
|
'て',
|
|
'で',
|
|
'ね',
|
|
'へ',
|
|
'べ',
|
|
'め',
|
|
'れ',
|
|
];
|
|
|
|
final LemmatizationRule ichidanVerbBase = LemmatizationRule.simple(
|
|
name: 'Ichidan verb - base form',
|
|
terminal: true,
|
|
pattern: 'る',
|
|
replacement: 'る',
|
|
lookAheadBehind: _lookBehinds,
|
|
validChildClasses: {WordClass.ichidanVerb},
|
|
wordClass: WordClass.ichidanVerb,
|
|
);
|
|
|
|
final LemmatizationRule ichidanVerbNegative = LemmatizationRule.simple(
|
|
name: 'Ichidan verb - negative form',
|
|
pattern: 'ない',
|
|
replacement: 'る',
|
|
lookAheadBehind: _lookBehinds,
|
|
validChildClasses: {WordClass.ichidanVerb},
|
|
wordClass: WordClass.ichidanVerb,
|
|
);
|
|
final LemmatizationRule ichidanVerbPast = LemmatizationRule.simple(
|
|
name: 'Ichidan verb - past form',
|
|
pattern: 'た',
|
|
replacement: 'る',
|
|
lookAheadBehind: _lookBehinds,
|
|
validChildClasses: {WordClass.ichidanVerb},
|
|
wordClass: WordClass.ichidanVerb,
|
|
);
|
|
|
|
final LemmatizationRule ichidanVerbTe = LemmatizationRule.simple(
|
|
name: 'Ichidan verb - te-form',
|
|
pattern: 'て',
|
|
replacement: 'る',
|
|
lookAheadBehind: _lookBehinds,
|
|
validChildClasses: {WordClass.ichidanVerb},
|
|
wordClass: WordClass.ichidanVerb,
|
|
);
|
|
|
|
final LemmatizationRule ichidanVerbTeiru = LemmatizationRule.simple(
|
|
name: 'Ichidan verb - te-form with いる',
|
|
pattern: 'ている',
|
|
replacement: 'る',
|
|
lookAheadBehind: _lookBehinds,
|
|
validChildClasses: {WordClass.ichidanVerb},
|
|
wordClass: WordClass.ichidanVerb,
|
|
);
|
|
|
|
final LemmatizationRule ichidanVerbTeita = LemmatizationRule.simple(
|
|
name: 'Ichidan verb - te-form with いた',
|
|
pattern: 'ていた',
|
|
replacement: 'る',
|
|
lookAheadBehind: _lookBehinds,
|
|
validChildClasses: {WordClass.ichidanVerb},
|
|
wordClass: WordClass.ichidanVerb,
|
|
);
|
|
|
|
final LemmatizationRule ichidanVerbConditional = LemmatizationRule.simple(
|
|
name: 'Ichidan verb - conditional form',
|
|
pattern: 'れば',
|
|
replacement: 'る',
|
|
lookAheadBehind: _lookBehinds,
|
|
validChildClasses: {WordClass.ichidanVerb},
|
|
wordClass: WordClass.ichidanVerb,
|
|
);
|
|
|
|
final LemmatizationRule ichidanVerbVolitional = LemmatizationRule.simple(
|
|
name: 'Ichidan verb - volitional form',
|
|
pattern: 'よう',
|
|
replacement: 'る',
|
|
lookAheadBehind: _lookBehinds,
|
|
validChildClasses: {WordClass.ichidanVerb},
|
|
wordClass: WordClass.ichidanVerb,
|
|
);
|
|
|
|
final LemmatizationRule ichidanVerbPotential = LemmatizationRule.simple(
|
|
name: 'Ichidan verb - potential form',
|
|
pattern: 'られる',
|
|
replacement: 'る',
|
|
lookAheadBehind: _lookBehinds,
|
|
validChildClasses: {WordClass.ichidanVerb},
|
|
wordClass: WordClass.ichidanVerb,
|
|
);
|
|
|
|
final LemmatizationRule ichidanVerbPassive = LemmatizationRule.simple(
|
|
name: 'Ichidan verb - passive form',
|
|
pattern: 'られる',
|
|
replacement: 'る',
|
|
lookAheadBehind: _lookBehinds,
|
|
validChildClasses: {WordClass.ichidanVerb},
|
|
wordClass: WordClass.ichidanVerb,
|
|
);
|
|
|
|
final LemmatizationRule ichidanVerbCausative = LemmatizationRule.simple(
|
|
name: 'Ichidan verb - causative form',
|
|
pattern: 'させる',
|
|
replacement: 'る',
|
|
lookAheadBehind: _lookBehinds,
|
|
validChildClasses: {WordClass.ichidanVerb},
|
|
wordClass: WordClass.ichidanVerb,
|
|
);
|
|
|
|
final LemmatizationRule ichidanVerbCausativePassive = LemmatizationRule.simple(
|
|
name: 'Ichidan verb - causative passive form',
|
|
pattern: 'させられる',
|
|
replacement: 'る',
|
|
lookAheadBehind: _lookBehinds,
|
|
validChildClasses: {WordClass.ichidanVerb},
|
|
wordClass: WordClass.ichidanVerb,
|
|
);
|
|
|
|
final LemmatizationRule ichidanVerbImperative = LemmatizationRule.simple(
|
|
name: 'Ichidan verb - imperative form',
|
|
pattern: 'れ',
|
|
replacement: 'る',
|
|
lookAheadBehind: _lookBehinds,
|
|
validChildClasses: {WordClass.ichidanVerb},
|
|
wordClass: WordClass.ichidanVerb,
|
|
);
|
|
|
|
final LemmatizationRule ichidanVerbNegativePast = LemmatizationRule.simple(
|
|
name: 'Ichidan verb - negative past form',
|
|
pattern: 'なかった',
|
|
replacement: 'る',
|
|
lookAheadBehind: _lookBehinds,
|
|
validChildClasses: {WordClass.ichidanVerb},
|
|
wordClass: WordClass.ichidanVerb,
|
|
);
|
|
|
|
final LemmatizationRule ichidanVerbNegativeTe = LemmatizationRule.simple(
|
|
name: 'Ichidan verb - negative te-form',
|
|
pattern: 'なくて',
|
|
replacement: 'る',
|
|
lookAheadBehind: _lookBehinds,
|
|
validChildClasses: {WordClass.ichidanVerb},
|
|
wordClass: WordClass.ichidanVerb,
|
|
);
|
|
|
|
final LemmatizationRule ichidanVerbNegativeConditional =
|
|
LemmatizationRule.simple(
|
|
name: 'Ichidan verb - negative conditional form',
|
|
pattern: 'なければ',
|
|
replacement: 'る',
|
|
lookAheadBehind: _lookBehinds,
|
|
validChildClasses: {WordClass.ichidanVerb},
|
|
wordClass: WordClass.ichidanVerb,
|
|
);
|
|
|
|
final LemmatizationRule ichidanVerbNegativeConditionalVariant1 =
|
|
LemmatizationRule.simple(
|
|
name: 'Ichidan verb - negative conditional form (informal variant)',
|
|
pattern: 'なきゃ',
|
|
replacement: 'る',
|
|
lookAheadBehind: _lookBehinds,
|
|
validChildClasses: {WordClass.ichidanVerb},
|
|
wordClass: WordClass.ichidanVerb,
|
|
);
|
|
|
|
final LemmatizationRule ichidanVerbNegativeConditionalVariant2 =
|
|
LemmatizationRule.simple(
|
|
name: 'Ichidan verb - negative conditional form (informal variant)',
|
|
pattern: 'なくちゃ',
|
|
replacement: 'る',
|
|
lookAheadBehind: _lookBehinds,
|
|
validChildClasses: {WordClass.ichidanVerb},
|
|
wordClass: WordClass.ichidanVerb,
|
|
);
|
|
|
|
final LemmatizationRule ichidanVerbNegativeConditionalVariant3 =
|
|
LemmatizationRule.simple(
|
|
name: 'Ichidan verb - negative conditional form (informal variant)',
|
|
pattern: 'ないと',
|
|
replacement: 'る',
|
|
lookAheadBehind: _lookBehinds,
|
|
validChildClasses: {WordClass.ichidanVerb},
|
|
wordClass: WordClass.ichidanVerb,
|
|
);
|
|
|
|
final LemmatizationRule ichidanVerbNegativeVolitional =
|
|
LemmatizationRule.simple(
|
|
name: 'Ichidan verb - negative volitional form',
|
|
pattern: 'なかろう',
|
|
replacement: 'る',
|
|
lookAheadBehind: _lookBehinds,
|
|
validChildClasses: {WordClass.ichidanVerb},
|
|
wordClass: WordClass.ichidanVerb,
|
|
);
|
|
|
|
final LemmatizationRule ichidanVerbNegativePotential = LemmatizationRule.simple(
|
|
name: 'Ichidan verb - negative potential form',
|
|
pattern: 'られない',
|
|
replacement: 'る',
|
|
lookAheadBehind: _lookBehinds,
|
|
validChildClasses: {WordClass.ichidanVerb},
|
|
wordClass: WordClass.ichidanVerb,
|
|
);
|
|
|
|
final LemmatizationRule ichidanVerbNegativePassive = LemmatizationRule.simple(
|
|
name: 'Ichidan verb - negative passive form',
|
|
pattern: 'られない',
|
|
replacement: 'る',
|
|
lookAheadBehind: _lookBehinds,
|
|
validChildClasses: {WordClass.ichidanVerb},
|
|
wordClass: WordClass.ichidanVerb,
|
|
);
|
|
|
|
final LemmatizationRule ichidanVerbNegativeCausative = LemmatizationRule.simple(
|
|
name: 'Ichidan verb - negative causative form',
|
|
pattern: 'させない',
|
|
replacement: 'る',
|
|
lookAheadBehind: _lookBehinds,
|
|
validChildClasses: {WordClass.ichidanVerb},
|
|
wordClass: WordClass.ichidanVerb,
|
|
);
|
|
|
|
final LemmatizationRule ichidanVerbNegativeCausativePassive =
|
|
LemmatizationRule.simple(
|
|
name: 'Ichidan verb - negative causative passive form',
|
|
pattern: 'させられない',
|
|
replacement: 'る',
|
|
lookAheadBehind: _lookBehinds,
|
|
validChildClasses: {WordClass.ichidanVerb},
|
|
wordClass: WordClass.ichidanVerb,
|
|
);
|
|
|
|
final LemmatizationRule ichidanVerbNegativeImperative =
|
|
LemmatizationRule.simple(
|
|
name: 'Ichidan verb - negative imperative form',
|
|
pattern: 'るな',
|
|
replacement: 'る',
|
|
lookAheadBehind: _lookBehinds,
|
|
validChildClasses: {WordClass.ichidanVerb},
|
|
wordClass: WordClass.ichidanVerb,
|
|
);
|
|
|
|
final LemmatizationRule ichidanVerbDesire = LemmatizationRule.simple(
|
|
name: 'Ichidan verb - desire form',
|
|
pattern: 'たい',
|
|
replacement: 'る',
|
|
lookAheadBehind: _lookBehinds,
|
|
validChildClasses: {WordClass.ichidanVerb},
|
|
wordClass: WordClass.ichidanVerb,
|
|
);
|
|
|
|
final LemmatizationRule ichidanVerbNegativeDesire = LemmatizationRule.simple(
|
|
name: 'Ichidan verb - negative desire form',
|
|
pattern: 'たくない',
|
|
replacement: 'る',
|
|
lookAheadBehind: _lookBehinds,
|
|
validChildClasses: {WordClass.ichidanVerb},
|
|
wordClass: WordClass.ichidanVerb,
|
|
);
|
|
|
|
final LemmatizationRule ichidanVerbPastDesire = LemmatizationRule.simple(
|
|
name: 'Ichidan verb - past desire form',
|
|
pattern: 'たかった',
|
|
replacement: 'る',
|
|
lookAheadBehind: _lookBehinds,
|
|
validChildClasses: {WordClass.ichidanVerb},
|
|
wordClass: WordClass.ichidanVerb,
|
|
);
|
|
|
|
final LemmatizationRule ichidanVerbNegativePastDesire =
|
|
LemmatizationRule.simple(
|
|
name: 'Ichidan verb - negative past desire form',
|
|
pattern: 'たくなかった',
|
|
replacement: 'る',
|
|
lookAheadBehind: _lookBehinds,
|
|
validChildClasses: {WordClass.ichidanVerb},
|
|
wordClass: WordClass.ichidanVerb,
|
|
);
|
|
|
|
final List<LemmatizationRule> ichidanVerbLemmatizationRules =
|
|
List.unmodifiable([
|
|
ichidanVerbBase,
|
|
ichidanVerbNegative,
|
|
ichidanVerbPast,
|
|
ichidanVerbTe,
|
|
ichidanVerbTeiru,
|
|
ichidanVerbTeita,
|
|
ichidanVerbConditional,
|
|
ichidanVerbVolitional,
|
|
ichidanVerbPotential,
|
|
ichidanVerbPassive,
|
|
ichidanVerbCausative,
|
|
ichidanVerbCausativePassive,
|
|
ichidanVerbImperative,
|
|
ichidanVerbNegativePast,
|
|
ichidanVerbNegativeTe,
|
|
ichidanVerbNegativeConditional,
|
|
ichidanVerbNegativeConditionalVariant1,
|
|
ichidanVerbNegativeConditionalVariant2,
|
|
ichidanVerbNegativeConditionalVariant3,
|
|
ichidanVerbNegativeVolitional,
|
|
ichidanVerbNegativePotential,
|
|
ichidanVerbNegativePassive,
|
|
ichidanVerbNegativeCausative,
|
|
ichidanVerbNegativeCausativePassive,
|
|
ichidanVerbNegativeImperative,
|
|
ichidanVerbDesire,
|
|
ichidanVerbNegativeDesire,
|
|
ichidanVerbPastDesire,
|
|
ichidanVerbNegativePastDesire,
|
|
]);
|