Compare commits
92 Commits
tanos-jlpt
...
mugiten-v0
| Author | SHA1 | Date | |
|---|---|---|---|
|
4ee21d98e2
|
|||
|
7247af19cb
|
|||
|
ac7deae608
|
|||
|
7978b74f8d
|
|||
|
50870f64a0
|
|||
|
62d77749e6
|
|||
|
80b3610a72
|
|||
|
54705c3c10
|
|||
|
c7134f0d06
|
|||
|
aac9bf69f6
|
|||
|
189d4a95cf
|
|||
|
c32775ce7a
|
|||
|
78f546fa28
|
|||
|
e0a35bdef9
|
|||
|
8ec9771222
|
|||
|
e2fe033bf4
|
|||
|
a6a48c196d
|
|||
|
26618cc06d
|
|||
|
b855a1dc48
|
|||
|
f8813e0ae3
|
|||
|
bd0fee1b2d
|
|||
|
42e7c95f59
|
|||
|
b25cc85afe
|
|||
|
ec14016ab5
|
|||
|
6eee49d2d1
|
|||
|
f819280268
|
|||
|
03a8e11d91
|
|||
|
fdd15df344
|
|||
|
0ea8331298
|
|||
|
9215807b5c
|
|||
|
72a58bc299
|
|||
|
c208ef75f2
|
|||
|
bfcb2bfc97
|
|||
|
52a686ac29
|
|||
|
8bff2c6799
|
|||
|
b8eba03000
|
|||
|
4d75bef208
|
|||
|
08f25f6277
|
|||
|
7fee0435e8
|
|||
|
73640d01f6
|
|||
|
2875f7a65f
|
|||
|
155a3aa658
|
|||
|
0351b7a9df
|
|||
|
3cc61e6ba9
|
|||
|
ebe29db092
|
|||
|
128dd14a0d
|
|||
|
501d3a089e
|
|||
|
e0ffd89ff4
|
|||
|
e30ed8ba9b
|
|||
|
d508b5e244
|
|||
|
31c3fb807e
|
|||
|
60d2017473
|
|||
|
135d81db80
|
|||
|
f8549bf55a
|
|||
|
69d86b34ff
|
|||
|
6d7c068e7b
|
|||
|
b6661c734f
|
|||
|
080638e7ef
|
|||
|
03d536d7d1
|
|||
|
3f267b78d3
|
|||
|
84ae7eca9e
|
|||
|
45c4c5f09a
|
|||
|
369fcdbd4b
|
|||
|
33cf5028f4
|
|||
|
90d5717928
|
|||
|
bb9550380c
|
|||
|
3680827176
|
|||
|
9d9ce840fa
|
|||
|
6c580e95e2
|
|||
|
2db99e76f0
|
|||
|
42db69e57a
|
|||
|
4407c06f12
|
|||
|
fc0956d5c3
|
|||
|
d7f7f9cd19
|
|||
|
cf95f85caa
|
|||
|
f278b34415
|
|||
|
30d8160698
|
|||
|
b07fc8f4b3
|
|||
|
8299572225
|
|||
|
78ba1bae1a
|
|||
|
87383c8951
|
|||
|
cd9b318956
|
|||
|
96f52b5860
|
|||
|
59e8db5add
|
|||
|
9038119eb7
|
|||
|
3290d5dc91
|
|||
|
4647ab2286
|
|||
|
2980bcda06
|
|||
|
1661817819
|
|||
|
581f9daf25
|
|||
|
9898793bca
|
|||
|
2e7e8851e1
|
1
.gitignore
vendored
1
.gitignore
vendored
@@ -6,6 +6,7 @@
|
||||
.packages
|
||||
|
||||
# Conventional directory for build output.
|
||||
/doc/
|
||||
/build/
|
||||
main.db
|
||||
|
||||
|
||||
@@ -2,6 +2,7 @@ import 'package:args/command_runner.dart';
|
||||
|
||||
import 'package:jadb/cli/commands/create_db.dart';
|
||||
import 'package:jadb/cli/commands/create_tanos_jlpt_mappings.dart';
|
||||
import 'package:jadb/cli/commands/lemmatize.dart';
|
||||
import 'package:jadb/cli/commands/query_kanji.dart';
|
||||
import 'package:jadb/cli/commands/query_word.dart';
|
||||
|
||||
@@ -14,6 +15,7 @@ Future<void> main(List<String> args) async {
|
||||
runner.addCommand(CreateDb());
|
||||
runner.addCommand(QueryKanji());
|
||||
runner.addCommand(QueryWord());
|
||||
runner.addCommand(Lemmatize());
|
||||
runner.addCommand(CreateTanosJlptMappings());
|
||||
|
||||
runner.run(args);
|
||||
|
||||
12
flake.lock
generated
12
flake.lock
generated
@@ -3,7 +3,7 @@
|
||||
"jmdict-src": {
|
||||
"flake": false,
|
||||
"locked": {
|
||||
"narHash": "sha256-84P7r/fFlBnawy6yChrD9WMHmOWcEGWUmoK70N4rdGQ=",
|
||||
"narHash": "sha256-sLl+OrVBgc4QCOZ2cvWGLZBerHDLuApyQOQyDyLUHtk=",
|
||||
"type": "file",
|
||||
"url": "http://ftp.edrdg.org/pub/Nihongo/JMdict_e.gz"
|
||||
},
|
||||
@@ -15,7 +15,7 @@
|
||||
"jmdict-with-examples-src": {
|
||||
"flake": false,
|
||||
"locked": {
|
||||
"narHash": "sha256-PM0sv7VcsCya2Ek02CI7hVwB3Jawn6bICSI+dsJK0yo=",
|
||||
"narHash": "sha256-FQvkYXwgmCJ+ChVkoFzamlG8kyczHAgsJ3zJ6OvRLZc=",
|
||||
"type": "file",
|
||||
"url": "http://ftp.edrdg.org/pub/Nihongo/JMdict_e_examp.gz"
|
||||
},
|
||||
@@ -27,7 +27,7 @@
|
||||
"kanjidic2-src": {
|
||||
"flake": false,
|
||||
"locked": {
|
||||
"narHash": "sha256-Lc0wUPpuDKuMDv2t87//w3z20RX8SMJI2iIRtUJ8fn0=",
|
||||
"narHash": "sha256-vyMpRnN9O3vCpvfVDACKdTlapBVx6yXg0X2tgXF2t+U=",
|
||||
"type": "file",
|
||||
"url": "https://www.edrdg.org/kanjidic/kanjidic2.xml.gz"
|
||||
},
|
||||
@@ -38,11 +38,11 @@
|
||||
},
|
||||
"nixpkgs": {
|
||||
"locked": {
|
||||
"lastModified": 1746904237,
|
||||
"narHash": "sha256-3e+AVBczosP5dCLQmMoMEogM57gmZ2qrVSrmq9aResQ=",
|
||||
"lastModified": 1751792365,
|
||||
"narHash": "sha256-J1kI6oAj25IG4EdVlg2hQz8NZTBNYvIS0l4wpr9KcUo=",
|
||||
"owner": "NixOS",
|
||||
"repo": "nixpkgs",
|
||||
"rev": "d89fc19e405cb2d55ce7cc114356846a0ee5e956",
|
||||
"rev": "1fd8bada0b6117e6c7eb54aad5813023eed37ccb",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
|
||||
23
flake.nix
23
flake.nix
@@ -81,6 +81,7 @@
|
||||
dart
|
||||
gnumake
|
||||
sqlite-interactive
|
||||
sqlite-analyzer
|
||||
sqlite-web
|
||||
sqlint
|
||||
sqlfluff
|
||||
@@ -103,10 +104,24 @@
|
||||
platforms = lib.platforms.all;
|
||||
};
|
||||
|
||||
src = lib.cleanSource ./.;
|
||||
src = builtins.filterSource (path: type: let
|
||||
baseName = baseNameOf (toString path);
|
||||
in !(lib.any (b: b) [
|
||||
(!(lib.cleanSourceFilter path type))
|
||||
(baseName == ".github" && type == "directory")
|
||||
(baseName == "nix" && type == "directory")
|
||||
(baseName == ".envrc" && type == "regular")
|
||||
(baseName == "flake.lock" && type == "regular")
|
||||
(baseName == "flake.nix" && type == "regular")
|
||||
])) ./.;
|
||||
|
||||
in forAllSystems (system: pkgs: {
|
||||
default = self.packages.${system}.database;
|
||||
|
||||
filteredSource = pkgs.runCommandLocal "filtered-source" { } ''
|
||||
ln -s ${src} $out
|
||||
'';
|
||||
|
||||
jmdict = pkgs.callPackage ./nix/jmdict.nix {
|
||||
inherit jmdict-src jmdict-with-examples-src edrdgMetadata;
|
||||
};
|
||||
@@ -128,6 +143,12 @@
|
||||
inherit src;
|
||||
};
|
||||
|
||||
database-wal = pkgs.callPackage ./nix/database.nix {
|
||||
inherit (self.packages.${system}) database-tool jmdict radkfile kanjidic2;
|
||||
inherit src;
|
||||
wal = true;
|
||||
};
|
||||
|
||||
docs = pkgs.callPackage ./nix/docs.nix {
|
||||
inherit (self.packages.${system}) database;
|
||||
};
|
||||
|
||||
@@ -97,7 +97,7 @@ class LanguageSource extends SQLWritable {
|
||||
|
||||
const LanguageSource({
|
||||
required this.language,
|
||||
this.phrase,
|
||||
required this.phrase,
|
||||
this.fullyDescribesSense = true,
|
||||
this.constructedFromSmallerWords = false,
|
||||
});
|
||||
@@ -161,7 +161,7 @@ class XRef {
|
||||
}
|
||||
|
||||
class Sense extends SQLWritable {
|
||||
final int id;
|
||||
final int senseId;
|
||||
final int orderNum;
|
||||
final List<XRefParts> antonyms;
|
||||
final List<String> dialects;
|
||||
@@ -176,7 +176,7 @@ class Sense extends SQLWritable {
|
||||
final List<XRefParts> seeAlso;
|
||||
|
||||
const Sense({
|
||||
required this.id,
|
||||
required this.senseId,
|
||||
required this.orderNum,
|
||||
this.antonyms = const [],
|
||||
this.dialects = const [],
|
||||
@@ -193,11 +193,12 @@ class Sense extends SQLWritable {
|
||||
|
||||
@override
|
||||
Map<String, Object?> get sqlValue => {
|
||||
'id': id,
|
||||
'senseId': senseId,
|
||||
'orderNum': orderNum,
|
||||
};
|
||||
|
||||
bool get isEmpty => antonyms.isEmpty &&
|
||||
bool get isEmpty =>
|
||||
antonyms.isEmpty &&
|
||||
dialects.isEmpty &&
|
||||
fields.isEmpty &&
|
||||
info.isEmpty &&
|
||||
@@ -211,17 +212,17 @@ class Sense extends SQLWritable {
|
||||
}
|
||||
|
||||
class Entry extends SQLWritable {
|
||||
final int id;
|
||||
final int entryId;
|
||||
final List<KanjiElement> kanji;
|
||||
final List<ReadingElement> readings;
|
||||
final List<Sense> senses;
|
||||
|
||||
const Entry({
|
||||
required this.id,
|
||||
required this.entryId,
|
||||
required this.kanji,
|
||||
required this.readings,
|
||||
required this.senses,
|
||||
});
|
||||
|
||||
Map<String, Object?> get sqlValue => {'id': id};
|
||||
Map<String, Object?> get sqlValue => {'entryId': entryId};
|
||||
}
|
||||
|
||||
@@ -2,8 +2,7 @@ import 'dart:collection';
|
||||
|
||||
import 'package:collection/collection.dart';
|
||||
import 'package:jadb/_data_ingestion/jmdict/objects.dart';
|
||||
import 'package:jadb/_data_ingestion/jmdict/table_names.dart';
|
||||
import 'package:jadb/util/romaji_transliteration.dart';
|
||||
import 'package:jadb/table_names/jmdict.dart';
|
||||
import 'package:sqflite_common/sqlite_api.dart';
|
||||
|
||||
class ResolvedXref {
|
||||
@@ -72,48 +71,47 @@ ResolvedXref resolveXref(
|
||||
Future<void> seedJMDictData(List<Entry> entries, Database db) async {
|
||||
print(' [JMdict] Batch 1 - Kanji and readings');
|
||||
Batch b = db.batch();
|
||||
|
||||
int elementId = 0;
|
||||
for (final e in entries) {
|
||||
b.insert(JMdictTableNames.entry, e.sqlValue);
|
||||
|
||||
for (final k in e.kanji) {
|
||||
b.insert(JMdictTableNames.kanjiElement, k.sqlValue..addAll({'entryId': e.id}));
|
||||
// b.insert(
|
||||
// JMdictTableNames.entryByKana,
|
||||
// {'entryId': e.id, 'kana': transliterateKatakanaToHiragana(k.reading)},
|
||||
// // Some entries have the same reading twice with difference in katakana and hiragana
|
||||
// conflictAlgorithm: ConflictAlgorithm.ignore,
|
||||
// );
|
||||
elementId++;
|
||||
b.insert(
|
||||
JMdictTableNames.kanjiElement,
|
||||
k.sqlValue..addAll({
|
||||
'entryId': e.entryId,
|
||||
'elementId': elementId,
|
||||
}),
|
||||
);
|
||||
|
||||
for (final i in k.info) {
|
||||
b.insert(
|
||||
JMdictTableNames.kanjiInfo,
|
||||
{
|
||||
'entryId': e.id,
|
||||
'reading': k.reading,
|
||||
'elementId': elementId,
|
||||
'info': i,
|
||||
},
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
for (final r in e.readings) {
|
||||
elementId++;
|
||||
b.insert(
|
||||
JMdictTableNames.readingElement,
|
||||
r.sqlValue..addAll({'entryId': e.id}),
|
||||
r.sqlValue..addAll({
|
||||
'entryId': e.entryId,
|
||||
'elementId': elementId,
|
||||
}),
|
||||
);
|
||||
|
||||
b.insert(
|
||||
JMdictTableNames.entryByKana,
|
||||
{
|
||||
'entryId': e.id,
|
||||
'kana': transliterateKanaToLatin(r.reading),
|
||||
},
|
||||
// Some entries have the same reading twice with difference in katakana and hiragana
|
||||
conflictAlgorithm: ConflictAlgorithm.ignore,
|
||||
);
|
||||
for (final i in r.info) {
|
||||
b.insert(
|
||||
JMdictTableNames.readingInfo,
|
||||
{
|
||||
'entryId': e.id,
|
||||
'reading': r.reading,
|
||||
'elementId': elementId,
|
||||
'info': i,
|
||||
},
|
||||
);
|
||||
@@ -122,79 +120,64 @@ Future<void> seedJMDictData(List<Entry> entries, Database db) async {
|
||||
b.insert(
|
||||
JMdictTableNames.readingRestriction,
|
||||
{
|
||||
'entryId': e.id,
|
||||
'reading': r.reading,
|
||||
'elementId': elementId,
|
||||
'restriction': res,
|
||||
},
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
for (final s in e.senses) {
|
||||
for (final g in s.glossary) {
|
||||
b.insert(
|
||||
JMdictTableNames.entryByEnglish,
|
||||
{
|
||||
'entryId': e.id,
|
||||
'english': g.phrase,
|
||||
},
|
||||
// Some entries have the same reading twice with difference in katakana and hiragana
|
||||
conflictAlgorithm: ConflictAlgorithm.ignore,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
await b.commit(noResult: true);
|
||||
|
||||
print(' [JMdict] Batch 2 - Senses');
|
||||
b = db.batch();
|
||||
|
||||
for (final e in entries) {
|
||||
for (final s in e.senses) {
|
||||
b.insert(JMdictTableNames.sense, s.sqlValue..addAll({'entryId': e.id}));
|
||||
b.insert(
|
||||
JMdictTableNames.sense, s.sqlValue..addAll({'entryId': e.entryId}));
|
||||
for (final d in s.dialects) {
|
||||
b.insert(JMdictTableNames.senseDialect, {'senseId': s.id, 'dialect': d});
|
||||
b.insert(
|
||||
JMdictTableNames.senseDialect,
|
||||
{'senseId': s.senseId, 'dialect': d},
|
||||
);
|
||||
}
|
||||
for (final f in s.fields) {
|
||||
b.insert(JMdictTableNames.senseField, {'senseId': s.id, 'field': f});
|
||||
b.insert(
|
||||
JMdictTableNames.senseField, {'senseId': s.senseId, 'field': f});
|
||||
}
|
||||
for (final i in s.info) {
|
||||
b.insert(JMdictTableNames.senseInfo, {'senseId': s.id, 'info': i});
|
||||
b.insert(JMdictTableNames.senseInfo, {'senseId': s.senseId, 'info': i});
|
||||
}
|
||||
for (final m in s.misc) {
|
||||
b.insert(JMdictTableNames.senseMisc, {'senseId': s.id, 'misc': m});
|
||||
b.insert(JMdictTableNames.senseMisc, {'senseId': s.senseId, 'misc': m});
|
||||
}
|
||||
for (final p in s.pos) {
|
||||
b.insert(JMdictTableNames.sensePOS, {'senseId': s.id, 'pos': p});
|
||||
}
|
||||
for (final l in s.languageSource) {
|
||||
b.insert(
|
||||
JMdictTableNames.senseLanguageSource,
|
||||
l.sqlValue..addAll({'senseId': s.id}),
|
||||
);
|
||||
b.insert(JMdictTableNames.sensePOS, {'senseId': s.senseId, 'pos': p});
|
||||
}
|
||||
for (final rk in s.restrictedToKanji) {
|
||||
b.insert(
|
||||
JMdictTableNames.senseRestrictedToKanji,
|
||||
{'entryId': e.id, 'senseId': s.id, 'kanji': rk},
|
||||
{'entryId': e.entryId, 'senseId': s.senseId, 'kanji': rk},
|
||||
);
|
||||
}
|
||||
for (final rr in s.restrictedToReading) {
|
||||
b.insert(
|
||||
JMdictTableNames.senseRestrictedToReading,
|
||||
{'entryId': e.id, 'senseId': s.id, 'reading': rr},
|
||||
{'entryId': e.entryId, 'senseId': s.senseId, 'reading': rr},
|
||||
);
|
||||
}
|
||||
for (final ls in s.languageSource) {
|
||||
b.insert(
|
||||
JMdictTableNames.senseLanguageSource,
|
||||
ls.sqlValue..addAll({'senseId': s.id}),
|
||||
ls.sqlValue..addAll({'senseId': s.senseId}),
|
||||
);
|
||||
}
|
||||
for (final g in s.glossary) {
|
||||
b.insert(
|
||||
JMdictTableNames.senseGlossary,
|
||||
g.sqlValue..addAll({'senseId': s.id}),
|
||||
g.sqlValue..addAll({'senseId': s.senseId}),
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -204,6 +187,7 @@ Future<void> seedJMDictData(List<Entry> entries, Database db) async {
|
||||
|
||||
print(' [JMdict] Building xref trees');
|
||||
SplayTreeMap<String, Set<Entry>> entriesByKanji = SplayTreeMap();
|
||||
|
||||
for (final entry in entries) {
|
||||
for (final kanji in entry.kanji) {
|
||||
if (entriesByKanji.containsKey(kanji.reading)) {
|
||||
@@ -239,8 +223,8 @@ Future<void> seedJMDictData(List<Entry> entries, Database db) async {
|
||||
b.insert(
|
||||
JMdictTableNames.senseSeeAlso,
|
||||
{
|
||||
'senseId': s.id,
|
||||
'xrefEntryId': resolvedEntry.entry.id,
|
||||
'senseId': s.senseId,
|
||||
'xrefEntryId': resolvedEntry.entry.entryId,
|
||||
'seeAlsoKanji': xref.kanjiRef,
|
||||
'seeAlsoReading': xref.readingRef,
|
||||
'seeAlsoSense': xref.senseOrderNum,
|
||||
@@ -257,8 +241,8 @@ Future<void> seedJMDictData(List<Entry> entries, Database db) async {
|
||||
);
|
||||
|
||||
b.insert(JMdictTableNames.senseAntonyms, {
|
||||
'senseId': s.id,
|
||||
'xrefEntryId': resolvedEntry.entry.id,
|
||||
'senseId': s.senseId,
|
||||
'xrefEntryId': resolvedEntry.entry.entryId,
|
||||
'antonymKanji': ant.kanjiRef,
|
||||
'antonymReading': ant.readingRef,
|
||||
'antonymSense': ant.senseOrderNum,
|
||||
|
||||
@@ -127,7 +127,7 @@ List<Entry> parseJMDictData(XmlElement root) {
|
||||
for (final (orderNum, sense) in entry.findElements('sense').indexed) {
|
||||
senseId++;
|
||||
final result = Sense(
|
||||
id: senseId,
|
||||
senseId: senseId,
|
||||
orderNum: orderNum + 1,
|
||||
restrictedToKanji:
|
||||
sense.findElements('stagk').map((e) => e.innerText).toList(),
|
||||
@@ -151,6 +151,7 @@ List<Entry> parseJMDictData(XmlElement root) {
|
||||
.map(
|
||||
(e) => LanguageSource(
|
||||
language: e.getAttribute('xml:lang') ?? 'eng',
|
||||
phrase: e.innerText.isNotEmpty ? e.innerText : null,
|
||||
fullyDescribesSense: e.getAttribute('ls_type') == 'part',
|
||||
constructedFromSmallerWords: e.getAttribute('ls_wasei') == 'y',
|
||||
),
|
||||
@@ -189,7 +190,7 @@ List<Entry> parseJMDictData(XmlElement root) {
|
||||
|
||||
entries.add(
|
||||
Entry(
|
||||
id: entryId,
|
||||
entryId: entryId,
|
||||
kanji: kanjiEls,
|
||||
readings: readingEls,
|
||||
senses: senses,
|
||||
|
||||
@@ -21,20 +21,17 @@ class CodePoint extends SQLWritable {
|
||||
|
||||
class Radical extends SQLWritable {
|
||||
final String kanji;
|
||||
final String type;
|
||||
final String radical;
|
||||
final int radicalId;
|
||||
|
||||
const Radical({
|
||||
required this.kanji,
|
||||
required this.type,
|
||||
required this.radical,
|
||||
required this.radicalId,
|
||||
});
|
||||
|
||||
@override
|
||||
Map<String, Object?> get sqlValue => {
|
||||
'kanji': kanji,
|
||||
'type': type,
|
||||
'radical': radical,
|
||||
'radicalId': radicalId,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -224,7 +221,7 @@ class Character extends SQLWritable {
|
||||
|
||||
final List<String> radicalName;
|
||||
final List<CodePoint> codepoints;
|
||||
final List<Radical> radicals;
|
||||
final Radical? radical;
|
||||
final List<int> strokeMiscounts;
|
||||
final List<Variant> variants;
|
||||
final List<DictionaryReference> dictionaryReferences;
|
||||
@@ -244,7 +241,7 @@ class Character extends SQLWritable {
|
||||
this.jlpt,
|
||||
this.radicalName = const [],
|
||||
this.codepoints = const [],
|
||||
this.radicals = const [],
|
||||
required this.radical,
|
||||
this.strokeMiscounts = const [],
|
||||
this.variants = const [],
|
||||
this.dictionaryReferences = const [],
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import 'package:jadb/_data_ingestion/kanjidic/table_names.dart';
|
||||
import 'package:jadb/table_names/kanjidic.dart';
|
||||
import 'package:sqflite_common/sqlite_api.dart';
|
||||
|
||||
import 'objects.dart';
|
||||
@@ -14,14 +14,24 @@ Future<void> seedKANJIDICData(List<Character> characters, Database db) async {
|
||||
// print(c.dictionaryReferences.map((e) => e.sqlValue).toList());
|
||||
// }
|
||||
b.insert(KANJIDICTableNames.character, c.sqlValue);
|
||||
|
||||
for (final n in c.radicalName) {
|
||||
b.insert(KANJIDICTableNames.radicalName, {'kanji': c.literal, 'name': n});
|
||||
assert(c.radical != null, 'Radical name without radical');
|
||||
b.insert(
|
||||
KANJIDICTableNames.radicalName,
|
||||
{
|
||||
'radicalId': c.radical!.radicalId,
|
||||
'name': n,
|
||||
},
|
||||
conflictAlgorithm: ConflictAlgorithm.ignore,
|
||||
);
|
||||
}
|
||||
|
||||
for (final cp in c.codepoints) {
|
||||
b.insert(KANJIDICTableNames.codepoint, cp.sqlValue);
|
||||
}
|
||||
for (final r in c.radicals) {
|
||||
b.insert(KANJIDICTableNames.radical, r.sqlValue);
|
||||
if (c.radical != null) {
|
||||
b.insert(KANJIDICTableNames.radical, c.radical!.sqlValue);
|
||||
}
|
||||
for (final sm in c.strokeMiscounts) {
|
||||
b.insert(
|
||||
@@ -52,14 +62,17 @@ Future<void> seedKANJIDICData(List<Character> characters, Database db) async {
|
||||
for (final r in c.readings) {
|
||||
b.insert(KANJIDICTableNames.reading, r.sqlValue);
|
||||
}
|
||||
for (final k in c.kunyomi) {
|
||||
b.insert(KANJIDICTableNames.kunyomi, k.sqlValue);
|
||||
for (final (i, y) in c.kunyomi.indexed) {
|
||||
b.insert(
|
||||
KANJIDICTableNames.kunyomi, y.sqlValue..addAll({'orderNum': i + 1}));
|
||||
}
|
||||
for (final o in c.onyomi) {
|
||||
b.insert(KANJIDICTableNames.onyomi, o.sqlValue);
|
||||
for (final (i, y) in c.onyomi.indexed) {
|
||||
b.insert(
|
||||
KANJIDICTableNames.onyomi, y.sqlValue..addAll({'orderNum': i + 1}));
|
||||
}
|
||||
for (final m in c.meanings) {
|
||||
b.insert(KANJIDICTableNames.meaning, m.sqlValue);
|
||||
for (final (i, m) in c.meanings.indexed) {
|
||||
b.insert(
|
||||
KANJIDICTableNames.meaning, m.sqlValue..addAll({'orderNum': i + 1}));
|
||||
}
|
||||
for (final n in c.nanori) {
|
||||
b.insert(
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import 'package:jadb/_data_ingestion/kanjidic/objects.dart';
|
||||
import 'package:jadb/util/romaji_transliteration.dart';
|
||||
import 'package:xml/xml.dart';
|
||||
|
||||
List<Character> parseKANJIDICData(XmlElement root) {
|
||||
@@ -27,11 +28,8 @@ List<Character> parseKANJIDICData(XmlElement root) {
|
||||
jlpt: int.tryParse(
|
||||
misc.findElements('jlpt').firstOrNull?.innerText ?? '',
|
||||
),
|
||||
radicalName: radical
|
||||
?.findElements('rad_name')
|
||||
.map((e) => e.innerText)
|
||||
.toList() ??
|
||||
[],
|
||||
radicalName:
|
||||
misc.findElements('rad_name').map((e) => e.innerText).toList(),
|
||||
codepoints: codepoint
|
||||
?.findElements('cp_value')
|
||||
.map(
|
||||
@@ -43,17 +41,16 @@ List<Character> parseKANJIDICData(XmlElement root) {
|
||||
)
|
||||
.toList() ??
|
||||
[],
|
||||
radicals: radical
|
||||
?.findElements('rad_value')
|
||||
.map(
|
||||
(e) => Radical(
|
||||
kanji: kanji,
|
||||
type: e.getAttribute('rad_type')!,
|
||||
radical: e.innerText,
|
||||
),
|
||||
)
|
||||
.toList() ??
|
||||
[],
|
||||
radical: radical
|
||||
?.findElements('rad_value')
|
||||
.where((e) => e.getAttribute('rad_type') == 'classical')
|
||||
.map(
|
||||
(e) => Radical(
|
||||
kanji: kanji,
|
||||
radicalId: int.parse(e.innerText),
|
||||
),
|
||||
)
|
||||
.firstOrNull,
|
||||
strokeMiscounts: misc
|
||||
.findElements('stroke_count')
|
||||
.skip(1)
|
||||
@@ -138,7 +135,7 @@ List<Character> parseKANJIDICData(XmlElement root) {
|
||||
.map(
|
||||
(e) => Onyomi(
|
||||
kanji: kanji,
|
||||
yomi: e.innerText,
|
||||
yomi: transliterateKatakanaToHiragana(e.innerText),
|
||||
isJouyou: e.getAttribute('r_status') == 'jy',
|
||||
type: e.getAttribute('on_type')),
|
||||
)
|
||||
|
||||
@@ -1,10 +1,7 @@
|
||||
import 'dart:ffi';
|
||||
import 'dart:io';
|
||||
|
||||
import 'package:jadb/_data_ingestion/jmdict/table_names.dart';
|
||||
import 'package:jadb/_data_ingestion/kanjidic/table_names.dart';
|
||||
import 'package:jadb/_data_ingestion/radkfile/table_names.dart';
|
||||
import 'package:jadb/_data_ingestion/tanos-jlpt/table_names.dart';
|
||||
import 'package:jadb/search.dart';
|
||||
import 'package:sqflite_common_ffi/sqflite_ffi.dart';
|
||||
import 'package:sqlite3/open.dart';
|
||||
|
||||
@@ -12,7 +9,8 @@ Future<Database> openLocalDb({
|
||||
String? libsqlitePath,
|
||||
String? jadbPath,
|
||||
bool readWrite = false,
|
||||
bool assertTablesExist = true,
|
||||
bool verifyTablesExist = true,
|
||||
bool walMode = false,
|
||||
}) async {
|
||||
libsqlitePath ??= Platform.environment['LIBSQLITE_PATH'];
|
||||
jadbPath ??= Platform.environment['JADB_PATH'];
|
||||
@@ -41,50 +39,19 @@ Future<Database> openLocalDb({
|
||||
).openDatabase(
|
||||
jadbPath,
|
||||
options: OpenDatabaseOptions(
|
||||
onOpen: (db) {
|
||||
db.execute("PRAGMA foreign_keys = ON");
|
||||
onConfigure: (db) async {
|
||||
if (walMode) {
|
||||
await db.execute("PRAGMA journal_mode = WAL");
|
||||
}
|
||||
await db.execute("PRAGMA foreign_keys = ON");
|
||||
},
|
||||
readOnly: !readWrite,
|
||||
),
|
||||
);
|
||||
|
||||
if (assertTablesExist) {
|
||||
await _assertTablesExist(db);
|
||||
if (verifyTablesExist) {
|
||||
await db.jadbVerifyTables();
|
||||
}
|
||||
|
||||
return db;
|
||||
}
|
||||
|
||||
Future<void> _assertTablesExist(Database db) async {
|
||||
final Set<String> tables = await db
|
||||
.query(
|
||||
'sqlite_master',
|
||||
columns: ['name'],
|
||||
where: 'type = ?',
|
||||
whereArgs: ['table'],
|
||||
)
|
||||
.then((result) {
|
||||
return result.map((row) => row['name'] as String).toSet();
|
||||
});
|
||||
|
||||
final Set<String> expectedTables = {
|
||||
...JMdictTableNames.allTables,
|
||||
...KANJIDICTableNames.allTables,
|
||||
...RADKFILETableNames.allTables,
|
||||
...TanosJLPTTableNames.allTables,
|
||||
};
|
||||
|
||||
final missingTables = expectedTables.difference(tables);
|
||||
|
||||
if (missingTables.isNotEmpty) {
|
||||
throw Exception([
|
||||
'Missing tables:',
|
||||
missingTables.map((table) => ' - $table').join('\n'),
|
||||
'',
|
||||
'Found tables:\n',
|
||||
tables.map((table) => ' - $table').join('\n'),
|
||||
'',
|
||||
'Please ensure the database is correctly set up.',
|
||||
].join('\n'));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import 'package:jadb/_data_ingestion/radkfile/table_names.dart';
|
||||
import 'package:jadb/table_names/radkfile.dart';
|
||||
import 'package:sqflite_common/sqlite_api.dart';
|
||||
|
||||
Future<void> seedRADKFILEData(
|
||||
|
||||
@@ -17,6 +17,9 @@ Future<void> seedData(Database db) async {
|
||||
await parseAndSeedDataFromRADKFILE(db);
|
||||
await parseAndSeedDataFromKANJIDIC(db);
|
||||
await parseAndSeedDataFromTanosJLPT(db);
|
||||
|
||||
print('Performing VACUUM');
|
||||
await db.execute('VACUUM');
|
||||
}
|
||||
|
||||
Future<void> parseAndSeedDataFromJMdict(Database db) async {
|
||||
|
||||
@@ -3,7 +3,7 @@ abstract class SQLWritable {
|
||||
const SQLWritable();
|
||||
|
||||
/// Returns a map of the object's properties and their values.
|
||||
///
|
||||
///
|
||||
/// Note that there might be properties in the object which is meant to be
|
||||
/// inserted into a different table. These properties will/should be excluded
|
||||
/// from this map.
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
import 'package:jadb/_data_ingestion/jmdict/table_names.dart';
|
||||
import 'package:jadb/table_names/jmdict.dart';
|
||||
import 'package:jadb/_data_ingestion/tanos-jlpt/objects.dart';
|
||||
import 'package:jadb/_data_ingestion/tanos-jlpt/overrides.dart';
|
||||
import 'package:jadb/util/sqlite_utils.dart';
|
||||
import 'package:sqflite_common/sqlite_api.dart';
|
||||
|
||||
Future<List<int>> _findReadingCandidates(
|
||||
@@ -13,7 +12,8 @@ Future<List<int>> _findReadingCandidates(
|
||||
JMdictTableNames.readingElement,
|
||||
columns: ['entryId'],
|
||||
where:
|
||||
'reading IN (${word.readings.map((e) => escapeStringValue(e)).join(',')})',
|
||||
'"reading" IN (${List.filled(word.readings.length, '?').join(',')})',
|
||||
whereArgs: [...word.readings],
|
||||
)
|
||||
.then((rows) => rows.map((row) => row['entryId'] as int).toList());
|
||||
|
||||
@@ -34,14 +34,20 @@ Future<List<(int, String)>> _findSenseCandidates(
|
||||
JLPTRankedWord word,
|
||||
Database db,
|
||||
) =>
|
||||
db
|
||||
.rawQuery('SELECT entryId, phrase '
|
||||
'FROM ${JMdictTableNames.senseGlossary} '
|
||||
'JOIN ${JMdictTableNames.sense} ON ${JMdictTableNames.senseGlossary}.senseId = ${JMdictTableNames.sense}.id '
|
||||
'WHERE phrase IN (${word.meanings.map((e) => escapeStringValue(e)).join(',')})')
|
||||
.then((rows) => rows
|
||||
.map((row) => (row['entryId'] as int, row['phrase'] as String))
|
||||
.toList());
|
||||
db.rawQuery(
|
||||
'SELECT entryId, phrase '
|
||||
'FROM "${JMdictTableNames.senseGlossary}" '
|
||||
'JOIN "${JMdictTableNames.sense}" USING (senseId)'
|
||||
'WHERE phrase IN (${List.filled(
|
||||
word.meanings.length,
|
||||
'?',
|
||||
).join(',')})',
|
||||
[...word.meanings],
|
||||
).then(
|
||||
(rows) => rows
|
||||
.map((row) => (row['entryId'] as int, row['phrase'] as String))
|
||||
.toList(),
|
||||
);
|
||||
|
||||
Future<int?> findEntry(
|
||||
JLPTRankedWord word,
|
||||
@@ -123,7 +129,9 @@ Future<Map<String, Set<int>>> resolveAllEntries(
|
||||
if (resolved != null) {
|
||||
result[word.jlptLevel]!.add(resolved);
|
||||
}
|
||||
} catch (e) {}
|
||||
} catch (e) {
|
||||
print('ERROR: $e');
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import 'package:jadb/_data_ingestion/tanos-jlpt/table_names.dart';
|
||||
import 'package:jadb/table_names/tanos_jlpt.dart';
|
||||
import 'package:sqflite_common/sqlite_api.dart';
|
||||
|
||||
Future<void> seedTanosJLPTData(
|
||||
|
||||
@@ -12,6 +12,15 @@ class CreateDb extends Command {
|
||||
|
||||
CreateDb() {
|
||||
addLibsqliteArg(argParser);
|
||||
argParser.addFlag(
|
||||
'wal',
|
||||
help: '''Whether to use Write-Ahead Logging (WAL) mode.
|
||||
|
||||
This is recommended for better performance, but may not be used with
|
||||
the readonly NixOS store.
|
||||
''',
|
||||
defaultsTo: false,
|
||||
);
|
||||
}
|
||||
|
||||
Future<void> run() async {
|
||||
@@ -22,15 +31,23 @@ class CreateDb extends Command {
|
||||
|
||||
final db = await openLocalDb(
|
||||
libsqlitePath: argResults!.option('libsqlite')!,
|
||||
walMode: argResults!.flag('wal'),
|
||||
readWrite: true,
|
||||
);
|
||||
|
||||
seedData(db).then((_) {
|
||||
bool failed = false;
|
||||
await seedData(db).then((_) {
|
||||
print("Database created successfully");
|
||||
}).catchError((error) {
|
||||
print("Error creating database: $error");
|
||||
failed = true;
|
||||
}).whenComplete(() {
|
||||
db.close();
|
||||
});
|
||||
if (failed) {
|
||||
exit(1);
|
||||
} else {
|
||||
exit(0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
46
lib/cli/commands/lemmatize.dart
Normal file
46
lib/cli/commands/lemmatize.dart
Normal file
@@ -0,0 +1,46 @@
|
||||
// import 'dart:io';
|
||||
|
||||
// import 'package:jadb/_data_ingestion/open_local_db.dart';
|
||||
import 'package:jadb/cli/args.dart';
|
||||
|
||||
import 'package:args/command_runner.dart';
|
||||
import 'package:jadb/util/lemmatizer/lemmatizer.dart';
|
||||
|
||||
class Lemmatize extends Command {
|
||||
final name = "lemmatize";
|
||||
final description = "Lemmatize a word using the Jadb lemmatizer";
|
||||
|
||||
Lemmatize() {
|
||||
addLibsqliteArg(argParser);
|
||||
addJadbArg(argParser);
|
||||
argParser.addOption(
|
||||
'word',
|
||||
abbr: 'w',
|
||||
help: 'The word to search for.',
|
||||
valueHelp: 'WORD',
|
||||
);
|
||||
}
|
||||
|
||||
Future<void> run() async {
|
||||
// if (argResults!.option('libsqlite') == null ||
|
||||
// argResults!.option('jadb') == null) {
|
||||
// print(argParser.usage);
|
||||
// exit(64);
|
||||
// }
|
||||
|
||||
// final db = await openLocalDb(
|
||||
// jadbPath: argResults!.option('jadb')!,
|
||||
// libsqlitePath: argResults!.option('libsqlite')!,
|
||||
// );
|
||||
|
||||
final String searchWord = argResults!.option('word') ?? '食べたくない';
|
||||
|
||||
final time = Stopwatch()..start();
|
||||
final result = lemmatize(searchWord);
|
||||
time.stop();
|
||||
|
||||
print(result.toString());
|
||||
|
||||
print("Lemmatization took ${time.elapsedMilliseconds}ms");
|
||||
}
|
||||
}
|
||||
@@ -1,4 +1,3 @@
|
||||
|
||||
import 'dart:convert';
|
||||
import 'dart:io';
|
||||
|
||||
@@ -11,6 +10,7 @@ import 'package:args/command_runner.dart';
|
||||
class QueryKanji extends Command {
|
||||
final name = "query-kanji";
|
||||
final description = "Query the database for kanji data";
|
||||
final invocation = "jadb query-kanji [options] <kanji>";
|
||||
|
||||
QueryKanji() {
|
||||
addLibsqliteArg(argParser);
|
||||
@@ -29,12 +29,25 @@ class QueryKanji extends Command {
|
||||
libsqlitePath: argResults!.option('libsqlite')!,
|
||||
);
|
||||
|
||||
final result = await JaDBConnection(db).searchKanji('漢');
|
||||
if (argResults!.rest.length != 1) {
|
||||
print('You need to provide exactly one kanji character to search for.');
|
||||
print('');
|
||||
printUsage();
|
||||
exit(64);
|
||||
}
|
||||
|
||||
final String kanji = argResults!.rest.first.trim();
|
||||
|
||||
final time = Stopwatch()..start();
|
||||
final result = await JaDBConnection(db).jadbSearchKanji(kanji);
|
||||
time.stop();
|
||||
|
||||
if (result == null) {
|
||||
print("No such kanji");
|
||||
} else {
|
||||
print(JsonEncoder.withIndent(' ').convert(result.toJson()));
|
||||
}
|
||||
|
||||
print("Query took ${time.elapsedMilliseconds}ms");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,5 +1,3 @@
|
||||
|
||||
import 'dart:convert';
|
||||
import 'dart:io';
|
||||
|
||||
import 'package:jadb/_data_ingestion/open_local_db.dart';
|
||||
@@ -7,10 +5,12 @@ import 'package:jadb/cli/args.dart';
|
||||
import 'package:jadb/search.dart';
|
||||
|
||||
import 'package:args/command_runner.dart';
|
||||
import 'package:sqflite_common/sqflite.dart';
|
||||
|
||||
class QueryWord extends Command {
|
||||
final name = "query-word";
|
||||
final description = "Query the database for word data";
|
||||
final invocation = "jadb query-word [options] (<word> | <ID>)";
|
||||
|
||||
QueryWord() {
|
||||
addLibsqliteArg(argParser);
|
||||
@@ -20,7 +20,9 @@ class QueryWord extends Command {
|
||||
Future<void> run() async {
|
||||
if (argResults!.option('libsqlite') == null ||
|
||||
argResults!.option('jadb') == null) {
|
||||
print(argParser.usage);
|
||||
print("You need to provide both libsqlite and jadb paths.");
|
||||
print('');
|
||||
printUsage();
|
||||
exit(64);
|
||||
}
|
||||
|
||||
@@ -29,15 +31,59 @@ class QueryWord extends Command {
|
||||
libsqlitePath: argResults!.option('libsqlite')!,
|
||||
);
|
||||
|
||||
final result = await JaDBConnection(db).searchWord('kana');
|
||||
if (argResults!.rest.isEmpty) {
|
||||
print('You need to provide a word or ID to search for.');
|
||||
print('');
|
||||
printUsage();
|
||||
exit(64);
|
||||
}
|
||||
|
||||
final String searchWord = argResults!.rest.join(" ");
|
||||
final int? maybeId = int.tryParse(searchWord);
|
||||
|
||||
if (maybeId != null && maybeId >= 1000000) {
|
||||
await _searchId(db, maybeId);
|
||||
} else {
|
||||
await _searchWord(db, searchWord);
|
||||
}
|
||||
}
|
||||
|
||||
Future<void> _searchId(DatabaseExecutor db, int id) async {
|
||||
final time = Stopwatch()..start();
|
||||
final result = await JaDBConnection(db).jadbGetWordById(id);
|
||||
time.stop();
|
||||
|
||||
if (result == null) {
|
||||
print("Invalid ID");
|
||||
} else {
|
||||
print(result.toString());
|
||||
}
|
||||
|
||||
print("Query took ${time.elapsedMilliseconds}ms");
|
||||
}
|
||||
|
||||
Future<void> _searchWord(DatabaseExecutor db, String searchWord) async {
|
||||
final time = Stopwatch()..start();
|
||||
final count = await JaDBConnection(db).jadbSearchWordCount(searchWord);
|
||||
time.stop();
|
||||
|
||||
final time2 = Stopwatch()..start();
|
||||
final result = await JaDBConnection(db).jadbSearchWord(searchWord);
|
||||
time2.stop();
|
||||
|
||||
if (result == null) {
|
||||
print("Invalid search");
|
||||
} else if (result.isEmpty) {
|
||||
print("No matches");
|
||||
} else {
|
||||
print(JsonEncoder.withIndent(' ')
|
||||
.convert(result.map((e) => e.toJson()).toList()));
|
||||
for (final e in result) {
|
||||
print(e.toString());
|
||||
print("");
|
||||
}
|
||||
}
|
||||
|
||||
print("Total count: ${count}");
|
||||
print("Count query took ${time.elapsedMilliseconds}ms");
|
||||
print("Query took ${time2.elapsedMilliseconds}ms");
|
||||
}
|
||||
}
|
||||
|
||||
1872
lib/const_data/kanji_grades.dart
Normal file
1872
lib/const_data/kanji_grades.dart
Normal file
File diff suppressed because it is too large
Load Diff
217
lib/const_data/radicals.dart
Normal file
217
lib/const_data/radicals.dart
Normal file
@@ -0,0 +1,217 @@
|
||||
const Map<int, List<String>> RADICALS = {
|
||||
1: ['一', '|', '丶', 'ノ', '乙', '亅'],
|
||||
2: [
|
||||
'二',
|
||||
'亠',
|
||||
'人',
|
||||
'⺅',
|
||||
'𠆢',
|
||||
'儿',
|
||||
'入',
|
||||
'ハ',
|
||||
'丷',
|
||||
'冂',
|
||||
'冖',
|
||||
'冫',
|
||||
'几',
|
||||
'凵',
|
||||
'刀',
|
||||
'⺉',
|
||||
'力',
|
||||
'勹',
|
||||
'匕',
|
||||
'匚',
|
||||
'十',
|
||||
'卜',
|
||||
'卩',
|
||||
'厂',
|
||||
'厶',
|
||||
'又',
|
||||
'マ',
|
||||
'九',
|
||||
'ユ',
|
||||
'乃',
|
||||
'𠂉'
|
||||
],
|
||||
3: [
|
||||
'⻌',
|
||||
'口',
|
||||
'囗',
|
||||
'土',
|
||||
'士',
|
||||
'夂',
|
||||
'夕',
|
||||
'大',
|
||||
'女',
|
||||
'子',
|
||||
'宀',
|
||||
'寸',
|
||||
'小',
|
||||
'⺌',
|
||||
'尢',
|
||||
'尸',
|
||||
'屮',
|
||||
'山',
|
||||
'川',
|
||||
'巛',
|
||||
'工',
|
||||
'已',
|
||||
'巾',
|
||||
'干',
|
||||
'幺',
|
||||
'广',
|
||||
'廴',
|
||||
'廾',
|
||||
'弋',
|
||||
'弓',
|
||||
'ヨ',
|
||||
'彑',
|
||||
'彡',
|
||||
'彳',
|
||||
'⺖',
|
||||
'⺘',
|
||||
'⺡',
|
||||
'⺨',
|
||||
'⺾',
|
||||
'⻏',
|
||||
'⻖',
|
||||
'也',
|
||||
'亡',
|
||||
'及',
|
||||
'久'
|
||||
],
|
||||
4: [
|
||||
'⺹',
|
||||
'心',
|
||||
'戈',
|
||||
'戸',
|
||||
'手',
|
||||
'支',
|
||||
'攵',
|
||||
'文',
|
||||
'斗',
|
||||
'斤',
|
||||
'方',
|
||||
'无',
|
||||
'日',
|
||||
'曰',
|
||||
'月',
|
||||
'木',
|
||||
'欠',
|
||||
'止',
|
||||
'歹',
|
||||
'殳',
|
||||
'比',
|
||||
'毛',
|
||||
'氏',
|
||||
'气',
|
||||
'水',
|
||||
'火',
|
||||
'⺣',
|
||||
'爪',
|
||||
'父',
|
||||
'爻',
|
||||
'爿',
|
||||
'片',
|
||||
'牛',
|
||||
'犬',
|
||||
'⺭',
|
||||
'王',
|
||||
'元',
|
||||
'井',
|
||||
'勿',
|
||||
'尤',
|
||||
'五',
|
||||
'屯',
|
||||
'巴',
|
||||
'毋'
|
||||
],
|
||||
5: [
|
||||
'玄',
|
||||
'瓦',
|
||||
'甘',
|
||||
'生',
|
||||
'用',
|
||||
'田',
|
||||
'疋',
|
||||
'疒',
|
||||
'癶',
|
||||
'白',
|
||||
'皮',
|
||||
'皿',
|
||||
'目',
|
||||
'矛',
|
||||
'矢',
|
||||
'石',
|
||||
'示',
|
||||
'禸',
|
||||
'禾',
|
||||
'穴',
|
||||
'立',
|
||||
'⻂',
|
||||
'世',
|
||||
'巨',
|
||||
'冊',
|
||||
'母',
|
||||
'⺲',
|
||||
'牙'
|
||||
],
|
||||
6: [
|
||||
'瓜',
|
||||
'竹',
|
||||
'米',
|
||||
'糸',
|
||||
'缶',
|
||||
'羊',
|
||||
'羽',
|
||||
'而',
|
||||
'耒',
|
||||
'耳',
|
||||
'聿',
|
||||
'肉',
|
||||
'自',
|
||||
'至',
|
||||
'臼',
|
||||
'舌',
|
||||
'舟',
|
||||
'艮',
|
||||
'色',
|
||||
'虍',
|
||||
'虫',
|
||||
'血',
|
||||
'行',
|
||||
'衣',
|
||||
'西'
|
||||
],
|
||||
7: [
|
||||
'臣',
|
||||
'見',
|
||||
'角',
|
||||
'言',
|
||||
'谷',
|
||||
'豆',
|
||||
'豕',
|
||||
'豸',
|
||||
'貝',
|
||||
'赤',
|
||||
'走',
|
||||
'足',
|
||||
'身',
|
||||
'車',
|
||||
'辛',
|
||||
'辰',
|
||||
'酉',
|
||||
'釆',
|
||||
'里',
|
||||
'舛',
|
||||
'麦'
|
||||
],
|
||||
8: ['金', '長', '門', '隶', '隹', '雨', '青', '非', '奄', '岡', '免', '斉'],
|
||||
9: ['面', '革', '韭', '音', '頁', '風', '飛', '食', '首', '香', '品'],
|
||||
10: ['馬', '骨', '高', '髟', '鬥', '鬯', '鬲', '鬼', '竜', '韋'],
|
||||
11: ['魚', '鳥', '鹵', '鹿', '麻', '亀', '啇', '黄', '黒'],
|
||||
12: ['黍', '黹', '無', '歯'],
|
||||
13: ['黽', '鼎', '鼓', '鼠'],
|
||||
14: ['鼻', '齊'],
|
||||
17: ['龠'],
|
||||
};
|
||||
@@ -1 +1,55 @@
|
||||
enum JlptLevel { none, n5, n4, n3, n2, n1 }
|
||||
enum JlptLevel implements Comparable<JlptLevel> {
|
||||
none,
|
||||
n1,
|
||||
n2,
|
||||
n3,
|
||||
n4,
|
||||
n5;
|
||||
|
||||
factory JlptLevel.fromString(String? level) {
|
||||
switch (level?.toUpperCase()) {
|
||||
case 'N1':
|
||||
return JlptLevel.n1;
|
||||
case 'N2':
|
||||
return JlptLevel.n2;
|
||||
case 'N3':
|
||||
return JlptLevel.n3;
|
||||
case 'N4':
|
||||
return JlptLevel.n4;
|
||||
case 'N5':
|
||||
return JlptLevel.n5;
|
||||
default:
|
||||
return JlptLevel.none;
|
||||
}
|
||||
}
|
||||
|
||||
String? toNullableString() {
|
||||
switch (this) {
|
||||
case JlptLevel.n1:
|
||||
return 'N1';
|
||||
case JlptLevel.n2:
|
||||
return 'N2';
|
||||
case JlptLevel.n3:
|
||||
return 'N3';
|
||||
case JlptLevel.n4:
|
||||
return 'N4';
|
||||
case JlptLevel.n5:
|
||||
return 'N5';
|
||||
case JlptLevel.none:
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
int? get asInt =>
|
||||
this == JlptLevel.none ? null : JlptLevel.values.indexOf(this);
|
||||
|
||||
String toString() => toNullableString() ?? 'N/A';
|
||||
|
||||
Object? toJson() => toNullableString();
|
||||
|
||||
factory JlptLevel.fromJson(Object? json) =>
|
||||
JlptLevel.fromString(json as String?);
|
||||
|
||||
@override
|
||||
int compareTo(JlptLevel other) => index - other.index;
|
||||
}
|
||||
|
||||
27
lib/models/create_empty_db.dart
Normal file
27
lib/models/create_empty_db.dart
Normal file
@@ -0,0 +1,27 @@
|
||||
import 'dart:io';
|
||||
import 'dart:isolate';
|
||||
import 'package:path/path.dart';
|
||||
|
||||
import 'package:sqflite_common/sqlite_api.dart';
|
||||
|
||||
String migrationDirPath() {
|
||||
final packageUri = Uri.parse('package:jadb/');
|
||||
final packagePath = Isolate.resolvePackageUriSync(packageUri);
|
||||
return packagePath!.resolve('../migrations').toFilePath();
|
||||
}
|
||||
|
||||
Future<void> createEmptyDb(DatabaseExecutor db) async {
|
||||
List<String> migrationFiles = [];
|
||||
for (final file in Directory(migrationDirPath()).listSync()) {
|
||||
if (file is File && file.path.endsWith('.sql')) {
|
||||
migrationFiles.add(file.path);
|
||||
}
|
||||
}
|
||||
|
||||
migrationFiles.sort((a, b) => basename(a).compareTo(basename(b)));
|
||||
|
||||
for (final file in migrationFiles) {
|
||||
final sql = await File(file).readAsString();
|
||||
await db.execute(sql);
|
||||
}
|
||||
}
|
||||
@@ -24,8 +24,7 @@ enum JMdictDialect {
|
||||
required this.description,
|
||||
});
|
||||
|
||||
static JMdictDialect fromId(String id) =>
|
||||
JMdictDialect.values.firstWhere(
|
||||
static JMdictDialect fromId(String id) => JMdictDialect.values.firstWhere(
|
||||
(e) => e.id == id,
|
||||
orElse: () => throw Exception('Unknown id: $id'),
|
||||
);
|
||||
|
||||
@@ -107,8 +107,7 @@ enum JMdictField {
|
||||
required this.description,
|
||||
});
|
||||
|
||||
static JMdictField fromId(String id) =>
|
||||
JMdictField.values.firstWhere(
|
||||
static JMdictField fromId(String id) => JMdictField.values.firstWhere(
|
||||
(e) => e.id == id,
|
||||
orElse: () => throw Exception('Unknown id: $id'),
|
||||
);
|
||||
|
||||
@@ -18,8 +18,7 @@ enum JMdictKanjiInfo {
|
||||
required this.description,
|
||||
});
|
||||
|
||||
static JMdictKanjiInfo fromId(String id) =>
|
||||
JMdictKanjiInfo.values.firstWhere(
|
||||
static JMdictKanjiInfo fromId(String id) => JMdictKanjiInfo.values.firstWhere(
|
||||
(e) => e.id == id,
|
||||
orElse: () => throw Exception('Unknown id: $id'),
|
||||
);
|
||||
|
||||
@@ -79,8 +79,7 @@ enum JMdictMisc {
|
||||
required this.description,
|
||||
});
|
||||
|
||||
static JMdictMisc fromId(String id) =>
|
||||
JMdictMisc.values.firstWhere(
|
||||
static JMdictMisc fromId(String id) => JMdictMisc.values.firstWhere(
|
||||
(e) => e.id == id,
|
||||
orElse: () => throw Exception('Unknown id: $id'),
|
||||
);
|
||||
|
||||
@@ -7,14 +7,21 @@ enum JMdictPOS {
|
||||
adjIx(id: 'adj-ix', description: 'adjective (keiyoushi) - yoi/ii class'),
|
||||
adjKari(id: 'adj-kari', description: '\'kari\' adjective (archaic)'),
|
||||
adjKu(id: 'adj-ku', description: '\'ku\' adjective (archaic)'),
|
||||
adjNa(id: 'adj-na', description: 'adjectival nouns or quasi-adjectives (keiyodoshi)'),
|
||||
adjNa(
|
||||
id: 'adj-na',
|
||||
description: 'adjectival nouns or quasi-adjectives (keiyodoshi)',
|
||||
),
|
||||
adjNari(id: 'adj-nari', description: 'archaic/formal form of na-adjective'),
|
||||
adjNo(id: 'adj-no', description: 'nouns which may take the genitive case particle ''no'''),
|
||||
adjNo(
|
||||
id: 'adj-no',
|
||||
description: 'nouns which may take the genitive case particle \'no\'',
|
||||
shortDescription: 'Na-adjective (keiyodoshi)',
|
||||
),
|
||||
adjPn(id: 'adj-pn', description: 'pre-noun adjectival (rentaishi)'),
|
||||
adjShiku(id: 'adj-shiku', description: '\'shiku\' adjective (archaic)'),
|
||||
adjT(id: 'adj-t', description: '\'taru\' adjective'),
|
||||
adv(id: 'adv', description: 'adverb (fukushi)'),
|
||||
advTo(id: 'adv-to', description: 'adverb taking the ''to'' particle'),
|
||||
advTo(id: 'adv-to', description: 'adverb taking the \'to\' particle'),
|
||||
aux(id: 'aux', description: 'auxiliary'),
|
||||
auxAdj(id: 'aux-adj', description: 'auxiliary adjective'),
|
||||
auxV(id: 'aux-v', description: 'auxiliary verb'),
|
||||
@@ -23,7 +30,11 @@ enum JMdictPOS {
|
||||
ctr(id: 'ctr', description: 'counter'),
|
||||
exp(id: 'exp', description: 'expressions (phrases, clauses, etc.)'),
|
||||
int(id: 'int', description: 'interjection (kandoushi)'),
|
||||
n(id: 'n', description: 'noun (common) (futsuumeishi)'),
|
||||
n(
|
||||
id: 'n',
|
||||
description: 'noun (common) (futsuumeishi)',
|
||||
shortDescription: 'noun',
|
||||
),
|
||||
nAdv(id: 'n-adv', description: 'adverbial noun (fukushitekimeishi)'),
|
||||
nPr(id: 'n-pr', description: 'proper noun'),
|
||||
nPref(id: 'n-pref', description: 'noun, used as a prefix'),
|
||||
@@ -38,73 +49,159 @@ enum JMdictPOS {
|
||||
vUnspec(id: 'v-unspec', description: 'verb unspecified'),
|
||||
v1(id: 'v1', description: 'Ichidan verb'),
|
||||
v1S(id: 'v1-s', description: 'Ichidan verb - kureru special class'),
|
||||
v2aS(id: 'v2a-s', description: 'Nidan verb with ''u'' ending (archaic)'),
|
||||
v2bK(id: 'v2b-k', description: 'Nidan verb (upper class) with ''bu'' ending (archaic)'),
|
||||
v2bS(id: 'v2b-s', description: 'Nidan verb (lower class) with ''bu'' ending (archaic)'),
|
||||
v2dK(id: 'v2d-k', description: 'Nidan verb (upper class) with ''dzu'' ending (archaic)'),
|
||||
v2dS(id: 'v2d-s', description: 'Nidan verb (lower class) with ''dzu'' ending (archaic)'),
|
||||
v2gK(id: 'v2g-k', description: 'Nidan verb (upper class) with ''gu'' ending (archaic)'),
|
||||
v2gS(id: 'v2g-s', description: 'Nidan verb (lower class) with ''gu'' ending (archaic)'),
|
||||
v2hK(id: 'v2h-k', description: 'Nidan verb (upper class) with ''hu/fu'' ending (archaic)'),
|
||||
v2hS(id: 'v2h-s', description: 'Nidan verb (lower class) with ''hu/fu'' ending (archaic)'),
|
||||
v2kK(id: 'v2k-k', description: 'Nidan verb (upper class) with ''ku'' ending (archaic)'),
|
||||
v2kS(id: 'v2k-s', description: 'Nidan verb (lower class) with ''ku'' ending (archaic)'),
|
||||
v2mK(id: 'v2m-k', description: 'Nidan verb (upper class) with ''mu'' ending (archaic)'),
|
||||
v2mS(id: 'v2m-s', description: 'Nidan verb (lower class) with ''mu'' ending (archaic)'),
|
||||
v2nS(id: 'v2n-s', description: 'Nidan verb (lower class) with ''nu'' ending (archaic)'),
|
||||
v2rK(id: 'v2r-k', description: 'Nidan verb (upper class) with ''ru'' ending (archaic)'),
|
||||
v2rS(id: 'v2r-s', description: 'Nidan verb (lower class) with ''ru'' ending (archaic)'),
|
||||
v2sS(id: 'v2s-s', description: 'Nidan verb (lower class) with ''su'' ending (archaic)'),
|
||||
v2tK(id: 'v2t-k', description: 'Nidan verb (upper class) with ''tsu'' ending (archaic)'),
|
||||
v2tS(id: 'v2t-s', description: 'Nidan verb (lower class) with ''tsu'' ending (archaic)'),
|
||||
v2wS(id: 'v2w-s', description: 'Nidan verb (lower class) with ''u'' ending and ''we'' conjugation (archaic)'),
|
||||
v2yK(id: 'v2y-k', description: 'Nidan verb (upper class) with ''yu'' ending (archaic)'),
|
||||
v2yS(id: 'v2y-s', description: 'Nidan verb (lower class) with ''yu'' ending (archaic)'),
|
||||
v2zS(id: 'v2z-s', description: 'Nidan verb (lower class) with ''zu'' ending (archaic)'),
|
||||
v4b(id: 'v4b', description: 'Yodan verb with ''bu'' ending (archaic)'),
|
||||
v4g(id: 'v4g', description: 'Yodan verb with ''gu'' ending (archaic)'),
|
||||
v4h(id: 'v4h', description: 'Yodan verb with ''hu/fu'' ending (archaic)'),
|
||||
v4k(id: 'v4k', description: 'Yodan verb with ''ku'' ending (archaic)'),
|
||||
v4m(id: 'v4m', description: 'Yodan verb with ''mu'' ending (archaic)'),
|
||||
v4n(id: 'v4n', description: 'Yodan verb with ''nu'' ending (archaic)'),
|
||||
v4r(id: 'v4r', description: 'Yodan verb with ''ru'' ending (archaic)'),
|
||||
v4s(id: 'v4s', description: 'Yodan verb with ''su'' ending (archaic)'),
|
||||
v4t(id: 'v4t', description: 'Yodan verb with ''tsu'' ending (archaic)'),
|
||||
v2aS(id: 'v2a-s', description: 'Nidan verb with \'u\' ending (archaic)'),
|
||||
v2bK(
|
||||
id: 'v2b-k',
|
||||
description: 'Nidan verb (upper class) with \'bu\' ending (archaic)',
|
||||
),
|
||||
v2bS(
|
||||
id: 'v2b-s',
|
||||
description: 'Nidan verb (lower class) with \'bu\' ending (archaic)',
|
||||
),
|
||||
v2dK(
|
||||
id: 'v2d-k',
|
||||
description: 'Nidan verb (upper class) with \'dzu\' ending (archaic)',
|
||||
),
|
||||
v2dS(
|
||||
id: 'v2d-s',
|
||||
description: 'Nidan verb (lower class) with \'dzu\' ending (archaic)',
|
||||
),
|
||||
v2gK(
|
||||
id: 'v2g-k',
|
||||
description: 'Nidan verb (upper class) with \'gu\' ending (archaic)',
|
||||
),
|
||||
v2gS(
|
||||
id: 'v2g-s',
|
||||
description: 'Nidan verb (lower class) with \'gu\' ending (archaic)',
|
||||
),
|
||||
v2hK(
|
||||
id: 'v2h-k',
|
||||
description: 'Nidan verb (upper class) with \'hu/fu\' ending (archaic)',
|
||||
),
|
||||
v2hS(
|
||||
id: 'v2h-s',
|
||||
description: 'Nidan verb (lower class) with \'hu/fu\' ending (archaic)',
|
||||
),
|
||||
v2kK(
|
||||
id: 'v2k-k',
|
||||
description: 'Nidan verb (upper class) with \'ku\' ending (archaic)',
|
||||
),
|
||||
v2kS(
|
||||
id: 'v2k-s',
|
||||
description: 'Nidan verb (lower class) with \'ku\' ending (archaic)',
|
||||
),
|
||||
v2mK(
|
||||
id: 'v2m-k',
|
||||
description: 'Nidan verb (upper class) with \'mu\' ending (archaic)',
|
||||
),
|
||||
v2mS(
|
||||
id: 'v2m-s',
|
||||
description: 'Nidan verb (lower class) with \'mu\' ending (archaic)',
|
||||
),
|
||||
v2nS(
|
||||
id: 'v2n-s',
|
||||
description: 'Nidan verb (lower class) with \'nu\' ending (archaic)',
|
||||
),
|
||||
v2rK(
|
||||
id: 'v2r-k',
|
||||
description: 'Nidan verb (upper class) with \'ru\' ending (archaic)',
|
||||
),
|
||||
v2rS(
|
||||
id: 'v2r-s',
|
||||
description: 'Nidan verb (lower class) with \'ru\' ending (archaic)',
|
||||
),
|
||||
v2sS(
|
||||
id: 'v2s-s',
|
||||
description: 'Nidan verb (lower class) with \'su\' ending (archaic)',
|
||||
),
|
||||
v2tK(
|
||||
id: 'v2t-k',
|
||||
description: 'Nidan verb (upper class) with \'tsu\' ending (archaic)',
|
||||
),
|
||||
v2tS(
|
||||
id: 'v2t-s',
|
||||
description: 'Nidan verb (lower class) with \'tsu\' ending (archaic)',
|
||||
),
|
||||
v2wS(
|
||||
id: 'v2w-s',
|
||||
description:
|
||||
'Nidan verb (lower class) with \'u\' ending and \'we\' conjugation (archaic)',
|
||||
),
|
||||
v2yK(
|
||||
id: 'v2y-k',
|
||||
description: 'Nidan verb (upper class) with \'yu\' ending (archaic)',
|
||||
),
|
||||
v2yS(
|
||||
id: 'v2y-s',
|
||||
description: 'Nidan verb (lower class) with \'yu\' ending (archaic)',
|
||||
),
|
||||
v2zS(
|
||||
id: 'v2z-s',
|
||||
description: 'Nidan verb (lower class) with \'zu\' ending (archaic)',
|
||||
),
|
||||
v4b(id: 'v4b', description: 'Yodan verb with \'bu\' ending (archaic)'),
|
||||
v4g(id: 'v4g', description: 'Yodan verb with \'gu\' ending (archaic)'),
|
||||
v4h(id: 'v4h', description: 'Yodan verb with \'hu/fu\' ending (archaic)'),
|
||||
v4k(id: 'v4k', description: 'Yodan verb with \'ku\' ending (archaic)'),
|
||||
v4m(id: 'v4m', description: 'Yodan verb with \'mu\' ending (archaic)'),
|
||||
v4n(id: 'v4n', description: 'Yodan verb with \'nu\' ending (archaic)'),
|
||||
v4r(id: 'v4r', description: 'Yodan verb with \'ru\' ending (archaic)'),
|
||||
v4s(id: 'v4s', description: 'Yodan verb with \'su\' ending (archaic)'),
|
||||
v4t(id: 'v4t', description: 'Yodan verb with \'tsu\' ending (archaic)'),
|
||||
v5aru(id: 'v5aru', description: 'Godan verb - -aru special class'),
|
||||
v5b(id: 'v5b', description: 'Godan verb with ''bu'' ending'),
|
||||
v5g(id: 'v5g', description: 'Godan verb with ''gu'' ending'),
|
||||
v5k(id: 'v5k', description: 'Godan verb with ''ku'' ending'),
|
||||
v5b(id: 'v5b', description: 'Godan verb with \'bu\' ending'),
|
||||
v5g(id: 'v5g', description: 'Godan verb with \'gu\' ending'),
|
||||
v5k(id: 'v5k', description: 'Godan verb with \'ku\' ending'),
|
||||
v5kS(id: 'v5k-s', description: 'Godan verb - Iku/Yuku special class'),
|
||||
v5m(id: 'v5m', description: 'Godan verb with ''mu'' ending'),
|
||||
v5n(id: 'v5n', description: 'Godan verb with ''nu'' ending'),
|
||||
v5r(id: 'v5r', description: 'Godan verb with ''ru'' ending'),
|
||||
v5rI(id: 'v5r-i', description: 'Godan verb with ''ru'' ending (irregular verb)'),
|
||||
v5s(id: 'v5s', description: 'Godan verb with ''su'' ending'),
|
||||
v5t(id: 'v5t', description: 'Godan verb with ''tsu'' ending'),
|
||||
v5u(id: 'v5u', description: 'Godan verb with ''u'' ending'),
|
||||
v5uS(id: 'v5u-s', description: 'Godan verb with ''u'' ending (special class)'),
|
||||
v5uru(id: 'v5uru', description: 'Godan verb - Uru old class verb (old form of Eru)'),
|
||||
v5m(id: 'v5m', description: 'Godan verb with \'mu\' ending'),
|
||||
v5n(id: 'v5n', description: 'Godan verb with \'nu\' ending'),
|
||||
v5r(id: 'v5r', description: 'Godan verb with \'ru\' ending'),
|
||||
v5rI(
|
||||
id: 'v5r-i',
|
||||
description: 'Godan verb with \'ru\' ending (irregular verb)',
|
||||
),
|
||||
v5s(id: 'v5s', description: 'Godan verb with \'su\' ending'),
|
||||
v5t(id: 'v5t', description: 'Godan verb with \'tsu\' ending'),
|
||||
v5u(id: 'v5u', description: 'Godan verb with \'u\' ending'),
|
||||
v5uS(
|
||||
id: 'v5u-s',
|
||||
description: 'Godan verb with \'u\' ending (special class)',
|
||||
),
|
||||
v5uru(
|
||||
id: 'v5uru',
|
||||
description: 'Godan verb - Uru old class verb (old form of Eru)',
|
||||
),
|
||||
vi(id: 'vi', description: 'intransitive verb'),
|
||||
vk(id: 'vk', description: 'Kuru verb - special class'),
|
||||
vn(id: 'vn', description: 'irregular nu verb'),
|
||||
vr(id: 'vr', description: 'irregular ru verb, plain form ends with -ri'),
|
||||
vs(id: 'vs', description: 'noun or participle which takes the aux. verb suru'),
|
||||
vsC(id: 'vs-c', description: 'suru verb - precursor to the modern suru'),
|
||||
vs(
|
||||
id: 'vs',
|
||||
description: 'noun or participle which takes the aux. verb suru',
|
||||
shortDescription: 'suru verb',
|
||||
),
|
||||
vsC(id: 'vs-c', description: 'su verb - precursor to the modern suru'),
|
||||
vsI(id: 'vs-i', description: 'suru verb - included'),
|
||||
vsS(id: 'vs-s', description: 'suru verb - special class'),
|
||||
vt(id: 'vt', description: 'transitive verb'),
|
||||
vz(id: 'vz', description: 'Ichidan verb - zuru verb (alternative form of -jiru verbs)');
|
||||
vz(
|
||||
id: 'vz',
|
||||
description: 'Ichidan verb - zuru verb (alternative form of -jiru verbs)',
|
||||
);
|
||||
|
||||
final String id;
|
||||
final String description;
|
||||
final String? _shortDescription;
|
||||
|
||||
const JMdictPOS({
|
||||
required this.id,
|
||||
required this.description,
|
||||
});
|
||||
String? shortDescription,
|
||||
}) : _shortDescription = shortDescription;
|
||||
|
||||
static JMdictPOS fromId(String id) =>
|
||||
JMdictPOS.values.firstWhere(
|
||||
String get shortDescription => _shortDescription ?? description;
|
||||
|
||||
static JMdictPOS fromId(String id) => JMdictPOS.values.firstWhere(
|
||||
(e) => e.id == id,
|
||||
orElse: () => throw Exception('Unknown id: $id'),
|
||||
);
|
||||
|
||||
@@ -4,37 +4,48 @@ class KanjiSearchRadical extends Equatable {
|
||||
/// The radical symbol.
|
||||
final String symbol;
|
||||
|
||||
/// The names of this radical.
|
||||
///
|
||||
/// Each name might refer to a specific form of the radical.
|
||||
final List<String> names;
|
||||
|
||||
/// The radical forms used in this kanji.
|
||||
///
|
||||
/// (e.g. "亻" for "人", "氵" for "水")
|
||||
final List<String> forms;
|
||||
|
||||
/// The meaning of the radical.
|
||||
final String meaning;
|
||||
/// The meanings of the radical.
|
||||
final List<String> meanings;
|
||||
|
||||
// ignore: public_member_api_docs
|
||||
const KanjiSearchRadical({
|
||||
required this.symbol,
|
||||
this.forms = const [],
|
||||
required this.meaning,
|
||||
required this.names,
|
||||
required this.forms,
|
||||
required this.meanings,
|
||||
});
|
||||
|
||||
@override
|
||||
List<Object> get props => [
|
||||
symbol,
|
||||
this.names,
|
||||
forms,
|
||||
meaning,
|
||||
meanings,
|
||||
];
|
||||
|
||||
Map<String, dynamic> toJson() => {
|
||||
'symbol': symbol,
|
||||
'names': names,
|
||||
'forms': forms,
|
||||
'meaning': meaning,
|
||||
'meanings': meanings,
|
||||
};
|
||||
|
||||
factory KanjiSearchRadical.fromJson(Map<String, dynamic> json) {
|
||||
return KanjiSearchRadical(
|
||||
symbol: json['symbol'] as String,
|
||||
names: (json['names'] as List).map((e) => e as String).toList(),
|
||||
forms: (json['forms'] as List).map((e) => e as String).toList(),
|
||||
meaning: json['meaning'] as String,
|
||||
meanings: (json['meanings'] as List).map((e) => e as String).toList(),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -7,7 +7,8 @@ class KanjiSearchResult extends Equatable {
|
||||
final String kanji;
|
||||
|
||||
/// The school level that the kanji is taught in, if applicable.
|
||||
final String? taughtIn;
|
||||
/// Ranges from `1` to `10` (except 7)
|
||||
final int? taughtIn;
|
||||
|
||||
/// The lowest JLPT exam that this kanji is likely to appear in, if applicable.
|
||||
///
|
||||
@@ -38,23 +39,51 @@ class KanjiSearchResult extends Equatable {
|
||||
/// Information about this character's radical, if applicable.
|
||||
final KanjiSearchRadical? radical;
|
||||
|
||||
// TODO: document more accurately what kind of parts?
|
||||
/// The parts used in this kanji.
|
||||
/// All radicals/kanji parts that make up this kanji.
|
||||
///
|
||||
/// Note that this list might not always be complete.
|
||||
final List<String> parts;
|
||||
|
||||
/// Ids for the kanji's symbol in different encoding systems
|
||||
/// (e.g. JIS213, JIS208, UCS, etc.)
|
||||
final Map<String, String> codepoints;
|
||||
|
||||
/// The kanji's nanori readings.
|
||||
///
|
||||
/// Nanori readings are special readings of kanji used in names.
|
||||
final List<String> nanori;
|
||||
|
||||
/// How to read this kanji in different languages.
|
||||
final Map<String, List<String>> alternativeLanguageReadings;
|
||||
|
||||
/// Common miscounts of the kanji's strokes.
|
||||
final List<int> strokeMiscounts;
|
||||
|
||||
/// Query codes for looking up this kanji in different indexing systems.
|
||||
final Map<String, List<String>> queryCodes;
|
||||
|
||||
/// References to other dictionaries that contain this kanji.
|
||||
final Map<String, String> dictionaryReferences;
|
||||
|
||||
const KanjiSearchResult({
|
||||
required this.kanji,
|
||||
this.taughtIn,
|
||||
this.jlptLevel,
|
||||
this.newspaperFrequencyRank,
|
||||
required this.taughtIn,
|
||||
required this.jlptLevel,
|
||||
required this.newspaperFrequencyRank,
|
||||
required this.strokeCount,
|
||||
required this.meanings,
|
||||
this.kunyomi = const [],
|
||||
this.onyomi = const [],
|
||||
required this.kunyomi,
|
||||
required this.onyomi,
|
||||
// this.kunyomiExamples = const [],
|
||||
// this.onyomiExamples = const [],
|
||||
this.radical,
|
||||
this.parts = const [],
|
||||
required this.radical,
|
||||
required this.parts,
|
||||
required this.codepoints,
|
||||
required this.nanori,
|
||||
required this.alternativeLanguageReadings,
|
||||
required this.strokeMiscounts,
|
||||
required this.queryCodes,
|
||||
required this.dictionaryReferences,
|
||||
});
|
||||
|
||||
@override
|
||||
@@ -71,6 +100,13 @@ class KanjiSearchResult extends Equatable {
|
||||
// onyomiExamples,
|
||||
radical,
|
||||
parts,
|
||||
codepoints,
|
||||
kanji,
|
||||
nanori,
|
||||
alternativeLanguageReadings,
|
||||
strokeMiscounts,
|
||||
queryCodes,
|
||||
dictionaryReferences,
|
||||
];
|
||||
|
||||
Map<String, dynamic> toJson() => {
|
||||
@@ -86,16 +122,18 @@ class KanjiSearchResult extends Equatable {
|
||||
// 'kunyomiExamples': kunyomiExamples,
|
||||
'radical': radical?.toJson(),
|
||||
'parts': parts,
|
||||
// 'strokeOrderDiagramUri': strokeOrderDiagramUri,
|
||||
// 'strokeOrderSvgUri': strokeOrderSvgUri,
|
||||
// 'strokeOrderGifUri': strokeOrderGifUri,
|
||||
// 'uri': uri,
|
||||
'codepoints': codepoints,
|
||||
'nanori': nanori,
|
||||
'alternativeLanguageReadings': alternativeLanguageReadings,
|
||||
'strokeMiscounts': strokeMiscounts,
|
||||
'queryCodes': queryCodes,
|
||||
'dictionaryReferences': dictionaryReferences,
|
||||
};
|
||||
|
||||
factory KanjiSearchResult.fromJson(Map<String, dynamic> json) {
|
||||
return KanjiSearchResult(
|
||||
kanji: json['kanji'] as String,
|
||||
taughtIn: json['taughtIn'] as String?,
|
||||
taughtIn: json['taughtIn'] as int?,
|
||||
jlptLevel: json['jlptLevel'] as String?,
|
||||
newspaperFrequencyRank: json['newspaperFrequencyRank'] as int?,
|
||||
strokeCount: json['strokeCount'] as int,
|
||||
@@ -112,6 +150,29 @@ class KanjiSearchResult extends Equatable {
|
||||
? KanjiSearchRadical.fromJson(json['radical'])
|
||||
: null,
|
||||
parts: (json['parts'] as List).map((e) => e as String).toList(),
|
||||
codepoints: (json['codepoints'] as Map<String, dynamic>).map(
|
||||
(key, value) => MapEntry(key, value as String),
|
||||
),
|
||||
nanori: (json['nanori'] as List).map((e) => e as String).toList(),
|
||||
alternativeLanguageReadings:
|
||||
(json['alternativeLanguageReadings'] as Map<String, dynamic>).map(
|
||||
(key, value) => MapEntry(
|
||||
key,
|
||||
(value as List).map((e) => e as String).toList(),
|
||||
),
|
||||
),
|
||||
strokeMiscounts:
|
||||
(json['strokeMiscounts'] as List).map((e) => e as int).toList(),
|
||||
queryCodes: (json['queryCodes'] as Map<String, dynamic>).map(
|
||||
(key, value) => MapEntry(
|
||||
key,
|
||||
(value as List).map((e) => e as String).toList(),
|
||||
),
|
||||
),
|
||||
dictionaryReferences:
|
||||
(json['dictionaryReferences'] as Map<String, dynamic>).map(
|
||||
(key, value) => MapEntry(key, value as String),
|
||||
),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,3 +0,0 @@
|
||||
class RadicalsSearchResult {
|
||||
// TODO: implement me
|
||||
}
|
||||
39
lib/models/verify_tables.dart
Normal file
39
lib/models/verify_tables.dart
Normal file
@@ -0,0 +1,39 @@
|
||||
import 'package:jadb/table_names/jmdict.dart';
|
||||
import 'package:jadb/table_names/kanjidic.dart';
|
||||
import 'package:jadb/table_names/radkfile.dart';
|
||||
import 'package:jadb/table_names/tanos_jlpt.dart';
|
||||
import 'package:sqflite_common/sqlite_api.dart';
|
||||
|
||||
Future<void> verifyTablesWithDbConnection(DatabaseExecutor db) async {
|
||||
final Set<String> tables = await db
|
||||
.query(
|
||||
'sqlite_master',
|
||||
columns: ['name'],
|
||||
where: 'type = ?',
|
||||
whereArgs: ['table'],
|
||||
)
|
||||
.then((result) {
|
||||
return result.map((row) => row['name'] as String).toSet();
|
||||
});
|
||||
|
||||
final Set<String> expectedTables = {
|
||||
...JMdictTableNames.allTables,
|
||||
...KANJIDICTableNames.allTables,
|
||||
...RADKFILETableNames.allTables,
|
||||
...TanosJLPTTableNames.allTables,
|
||||
};
|
||||
|
||||
final missingTables = expectedTables.difference(tables);
|
||||
|
||||
if (missingTables.isNotEmpty) {
|
||||
throw Exception([
|
||||
'Missing tables:',
|
||||
missingTables.map((table) => ' - $table').join('\n'),
|
||||
'',
|
||||
'Found tables:\n',
|
||||
tables.map((table) => ' - $table').join('\n'),
|
||||
'',
|
||||
'Please ensure the database is correctly set up.',
|
||||
].join('\n'));
|
||||
}
|
||||
}
|
||||
@@ -1,3 +1,4 @@
|
||||
import 'package:jadb/models/common/jlpt_level.dart';
|
||||
import 'package:jadb/models/jmdict/jmdict_kanji_info.dart';
|
||||
import 'package:jadb/models/jmdict/jmdict_reading_info.dart';
|
||||
import 'package:jadb/models/word_search/word_search_ruby.dart';
|
||||
@@ -6,9 +7,15 @@ import 'package:jadb/models/word_search/word_search_sources.dart';
|
||||
|
||||
/// A class representing a single dictionary entry from a word search.
|
||||
class WordSearchResult {
|
||||
/// The score of the entry, used for sorting results.
|
||||
final int score;
|
||||
|
||||
/// The ID of the entry in the database.
|
||||
final int entryId;
|
||||
|
||||
/// Whether the word is common or not.
|
||||
final bool isCommon;
|
||||
|
||||
/// The variants of the word in Japanese.
|
||||
final List<WordSearchRuby> japanese;
|
||||
|
||||
@@ -21,32 +28,43 @@ class WordSearchResult {
|
||||
/// The meanings of the word, including parts of speech and other information.
|
||||
final List<WordSearchSense> senses;
|
||||
|
||||
/// The JLPT level of the word.
|
||||
final JlptLevel jlptLevel;
|
||||
|
||||
/// A class listing the sources used to make up the data for this word search result.
|
||||
final WordSearchSources sources;
|
||||
|
||||
const WordSearchResult({
|
||||
required this.score,
|
||||
required this.entryId,
|
||||
required this.isCommon,
|
||||
required this.japanese,
|
||||
required this.kanjiInfo,
|
||||
required this.readingInfo,
|
||||
required this.senses,
|
||||
required this.jlptLevel,
|
||||
required this.sources,
|
||||
});
|
||||
|
||||
Map<String, dynamic> toJson() => {
|
||||
'_score': score,
|
||||
'entryId': entryId,
|
||||
'isCommon': isCommon,
|
||||
'japanese': japanese.map((e) => e.toJson()).toList(),
|
||||
'kanjiInfo':
|
||||
kanjiInfo.map((key, value) => MapEntry(key, value.toJson())),
|
||||
'readingInfo':
|
||||
readingInfo.map((key, value) => MapEntry(key, value.toJson())),
|
||||
'senses': senses.map((e) => e.toJson()).toList(),
|
||||
'jlptLevel': jlptLevel.toJson(),
|
||||
'sources': sources.toJson(),
|
||||
};
|
||||
|
||||
factory WordSearchResult.fromJson(Map<String, dynamic> json) =>
|
||||
WordSearchResult(
|
||||
score: json['_score'] as int,
|
||||
entryId: json['entryId'] as int,
|
||||
isCommon: json['isCommon'] as bool,
|
||||
japanese: (json['japanese'] as List<dynamic>)
|
||||
.map((e) => WordSearchRuby.fromJson(e))
|
||||
.toList(),
|
||||
@@ -59,6 +77,24 @@ class WordSearchResult {
|
||||
senses: (json['senses'] as List<dynamic>)
|
||||
.map((e) => WordSearchSense.fromJson(e))
|
||||
.toList(),
|
||||
jlptLevel: JlptLevel.fromJson(json['jlptLevel'] as Object?),
|
||||
sources: WordSearchSources.fromJson(json['sources']),
|
||||
);
|
||||
|
||||
String _formatJapaneseWord(WordSearchRuby word) =>
|
||||
word.furigana == null ? word.base : "${word.base} (${word.furigana})";
|
||||
|
||||
@override
|
||||
String toString() {
|
||||
final japaneseWord = _formatJapaneseWord(japanese[0]);
|
||||
final isCommonString = isCommon ? '(C)' : '';
|
||||
final jlptLevelString = "(${jlptLevel.toString()})";
|
||||
|
||||
return '''
|
||||
${score} | [$entryId] $japaneseWord $isCommonString $jlptLevelString
|
||||
Other forms: ${japanese.skip(1).map(_formatJapaneseWord).join(', ')}
|
||||
Senses: ${senses.map((s) => s.englishDefinitions).join(', ')}
|
||||
'''
|
||||
.trim();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,6 +2,7 @@ import 'package:jadb/models/jmdict/jmdict_dialect.dart';
|
||||
import 'package:jadb/models/jmdict/jmdict_field.dart';
|
||||
import 'package:jadb/models/jmdict/jmdict_misc.dart';
|
||||
import 'package:jadb/models/jmdict/jmdict_pos.dart';
|
||||
import 'package:jadb/models/word_search/word_search_sense_language_source.dart';
|
||||
import 'package:jadb/models/word_search/word_search_xref_entry.dart';
|
||||
|
||||
class WordSearchSense {
|
||||
@@ -38,7 +39,7 @@ class WordSearchSense {
|
||||
// TODO: there is a lot more info to collect in the languageSource data
|
||||
|
||||
/// Information about the the origin of the word, if loaned from another language.
|
||||
final List<String> languageSource;
|
||||
final List<WordSearchSenseLanguageSource> languageSource;
|
||||
|
||||
// TODO: add example sentences
|
||||
|
||||
@@ -106,6 +107,8 @@ class WordSearchSense {
|
||||
misc:
|
||||
(json['misc'] as List).map((e) => JMdictMisc.fromJson(e)).toList(),
|
||||
info: List<String>.from(json['info']),
|
||||
languageSource: List<String>.from(json['languageSource']),
|
||||
languageSource: (json['languageSource'] as List)
|
||||
.map((e) => WordSearchSenseLanguageSource.fromJson(e))
|
||||
.toList(),
|
||||
);
|
||||
}
|
||||
|
||||
@@ -0,0 +1,30 @@
|
||||
/// A reference to a foreign language where this sense originates from.
|
||||
class WordSearchSenseLanguageSource {
|
||||
final String language;
|
||||
final String? phrase;
|
||||
final bool fullyDescribesSense;
|
||||
final bool constructedFromSmallerWords;
|
||||
|
||||
const WordSearchSenseLanguageSource({
|
||||
required this.language,
|
||||
this.phrase,
|
||||
this.fullyDescribesSense = true,
|
||||
this.constructedFromSmallerWords = false,
|
||||
});
|
||||
|
||||
Map<String, Object?> toJson() => {
|
||||
'language': language,
|
||||
'phrase': phrase,
|
||||
'fullyDescribesSense': fullyDescribesSense,
|
||||
'constructedFromSmallerWords': constructedFromSmallerWords,
|
||||
};
|
||||
|
||||
factory WordSearchSenseLanguageSource.fromJson(Map<String, dynamic> json) =>
|
||||
WordSearchSenseLanguageSource(
|
||||
language: json['language'],
|
||||
phrase: json['phrase'],
|
||||
fullyDescribesSense: json['fullyDescribesSense'] ?? true,
|
||||
constructedFromSmallerWords:
|
||||
json['constructedFromSmallerWords'] ?? false,
|
||||
);
|
||||
}
|
||||
@@ -3,6 +3,12 @@ class WordSearchXrefEntry {
|
||||
/// The ID of the entry that this entry cross-references to.
|
||||
final int entryId;
|
||||
|
||||
/// The base word of the cross-referenced entry.
|
||||
final String baseWord;
|
||||
|
||||
/// The furigana of the cross-referenced entry, if any.
|
||||
final String? furigana;
|
||||
|
||||
/// Whether the entryId was ambiguous during the creation of the
|
||||
/// database (and hence might be incorrect).
|
||||
final bool ambiguous;
|
||||
@@ -10,16 +16,22 @@ class WordSearchXrefEntry {
|
||||
const WordSearchXrefEntry({
|
||||
required this.entryId,
|
||||
required this.ambiguous,
|
||||
required this.baseWord,
|
||||
required this.furigana,
|
||||
});
|
||||
|
||||
Map<String, dynamic> toJson() => {
|
||||
'entryId': entryId,
|
||||
'ambiguous': ambiguous,
|
||||
'baseWord': baseWord,
|
||||
'furigana': furigana,
|
||||
};
|
||||
|
||||
factory WordSearchXrefEntry.fromJson(Map<String, dynamic> json) =>
|
||||
WordSearchXrefEntry(
|
||||
entryId: json['entryId'] as int,
|
||||
ambiguous: json['ambiguous'] as bool,
|
||||
baseWord: json['baseWord'] as String,
|
||||
furigana: json['furigana'] as String?,
|
||||
);
|
||||
}
|
||||
|
||||
@@ -1,25 +1,66 @@
|
||||
import 'package:jadb/models/verify_tables.dart';
|
||||
import 'package:jadb/models/word_search/word_search_result.dart';
|
||||
import 'package:jadb/models/kanji_search/kanji_search_result.dart';
|
||||
import 'package:jadb/models/radkfile/radicals_search_result.dart';
|
||||
import 'package:jadb/search/word_search.dart';
|
||||
import 'package:jadb/search/filter_kanji.dart';
|
||||
import 'package:jadb/search/radical_search.dart';
|
||||
import 'package:jadb/search/word_search/word_search.dart';
|
||||
|
||||
import 'package:jadb/search/kanji_search.dart';
|
||||
|
||||
import 'package:sqflite_common/sqlite_api.dart';
|
||||
|
||||
class JaDBConnection {
|
||||
final DatabaseExecutor _connection;
|
||||
extension JaDBConnection on DatabaseExecutor {
|
||||
/// Ensure that the database contain all JaDB tables.
|
||||
///
|
||||
/// This will throw an exception if any of the tables are missing.
|
||||
Future<void> jadbVerifyTables() => verifyTablesWithDbConnection(this);
|
||||
|
||||
const JaDBConnection(this._connection);
|
||||
/// Search for a kanji in the database.
|
||||
Future<KanjiSearchResult?> jadbSearchKanji(String kanji) =>
|
||||
searchKanjiWithDbConnection(this, kanji);
|
||||
|
||||
Future<KanjiSearchResult?> searchKanji(String kanji) async =>
|
||||
searchKanjiWithDbConnection(this._connection, kanji);
|
||||
/// Filter a list of characters, and return the ones that are listed in the kanji dictionary.
|
||||
Future<List<String>> filterKanji(
|
||||
List<String> kanji, {
|
||||
bool deduplicate = false,
|
||||
}) =>
|
||||
filterKanjiWithDbConnection(this, kanji, deduplicate);
|
||||
|
||||
Future<RadicalsSearchResult> searchKanjiByRadicals(
|
||||
List<String> radicals) async {
|
||||
throw UnimplementedError();
|
||||
}
|
||||
/// Search for a word in the database.
|
||||
Future<List<WordSearchResult>?> jadbSearchWord(
|
||||
String word, {
|
||||
SearchMode searchMode = SearchMode.Auto,
|
||||
int page = 0,
|
||||
int pageSize = 10,
|
||||
}) =>
|
||||
searchWordWithDbConnection(
|
||||
this,
|
||||
word,
|
||||
searchMode,
|
||||
page,
|
||||
pageSize,
|
||||
);
|
||||
|
||||
Future<List<WordSearchResult>?> searchWord(String word) async =>
|
||||
searchWordWithDbConnection(this._connection, word);
|
||||
///
|
||||
Future<WordSearchResult?> jadbGetWordById(int id) =>
|
||||
getWordByIdWithDbConnection(this, id);
|
||||
|
||||
/// Search for a word in the database, and return the count of results.
|
||||
Future<int?> jadbSearchWordCount(
|
||||
String word, {
|
||||
SearchMode searchMode = SearchMode.Auto,
|
||||
}) =>
|
||||
searchWordCountWithDbConnection(this, word, searchMode);
|
||||
|
||||
/// Given a list of radicals, search which kanji contains all
|
||||
/// of the radicals, find their other radicals, and return those.
|
||||
/// This is used to figure out which remaining combinations of radicals
|
||||
/// the user can search for without getting zero results.
|
||||
Future<List<String>> jadbSearchRemainingRadicals(List<String> radicals) =>
|
||||
searchRemainingRadicalsWithDbConnection(this, radicals);
|
||||
|
||||
/// Given a list of radicals, search which kanji contains all
|
||||
/// of the radicals, and return those.
|
||||
Future<List<String>> jadbSearchKanjiByRadicals(List<String> radicals) =>
|
||||
searchKanjiByRadicalsWithDbConnection(this, radicals);
|
||||
}
|
||||
|
||||
23
lib/search/filter_kanji.dart
Normal file
23
lib/search/filter_kanji.dart
Normal file
@@ -0,0 +1,23 @@
|
||||
import 'package:jadb/table_names/kanjidic.dart';
|
||||
import 'package:sqflite_common/sqflite.dart';
|
||||
|
||||
Future<List<String>> filterKanjiWithDbConnection(
|
||||
DatabaseExecutor connection,
|
||||
List<String> kanji,
|
||||
bool deduplicate,
|
||||
) async {
|
||||
final Set<String> filteredKanji = await connection.rawQuery(
|
||||
'''
|
||||
SELECT "literal"
|
||||
FROM "${KANJIDICTableNames.character}"
|
||||
WHERE "literal" IN (${kanji.map((_) => '?').join(',')})
|
||||
''',
|
||||
kanji,
|
||||
).then((value) => value.map((e) => e['literal'] as String).toSet());
|
||||
|
||||
if (deduplicate) {
|
||||
return filteredKanji.toList();
|
||||
} else {
|
||||
return kanji.where((k) => filteredKanji.contains(k)).toList();
|
||||
}
|
||||
}
|
||||
@@ -1,4 +1,9 @@
|
||||
import 'package:collection/collection.dart';
|
||||
import 'package:jadb/table_names/kanjidic.dart';
|
||||
import 'package:jadb/table_names/radkfile.dart';
|
||||
import 'package:jadb/models/kanji_search/kanji_search_radical.dart';
|
||||
import 'package:jadb/models/kanji_search/kanji_search_result.dart';
|
||||
import 'package:jadb/util/romaji_transliteration.dart';
|
||||
import 'package:sqflite_common/sqflite.dart';
|
||||
|
||||
Future<KanjiSearchResult?> searchKanjiWithDbConnection(
|
||||
@@ -7,94 +12,109 @@ Future<KanjiSearchResult?> searchKanjiWithDbConnection(
|
||||
) async {
|
||||
late final List<Map<String, Object?>> characters;
|
||||
final characters_query = connection.query(
|
||||
"KANJIDIC_Character",
|
||||
where: "KANJIDIC_Character.literal = ?",
|
||||
KANJIDICTableNames.character,
|
||||
where: "literal = ?",
|
||||
whereArgs: [kanji],
|
||||
);
|
||||
|
||||
late final List<Map<String, Object?>> codepoints;
|
||||
final codepoints_query = connection.query(
|
||||
"KANJIDIC_Codepoint",
|
||||
where: "KANJIDIC_Codepoint.kanji = ?",
|
||||
KANJIDICTableNames.codepoint,
|
||||
where: "kanji = ?",
|
||||
whereArgs: [kanji],
|
||||
);
|
||||
|
||||
late final List<Map<String, Object?>> kunyomis;
|
||||
final kunyomis_query = connection.query(
|
||||
"KANJIDIC_Kunyomi",
|
||||
where: "KANJIDIC_Kunyomi.kanji = ?",
|
||||
KANJIDICTableNames.kunyomi,
|
||||
where: "kanji = ?",
|
||||
whereArgs: [kanji],
|
||||
orderBy: "orderNum",
|
||||
);
|
||||
|
||||
late final List<Map<String, Object?>> onyomis;
|
||||
final onyomis_query = connection.query(
|
||||
"KANJIDIC_Onyomi",
|
||||
where: "KANJIDIC_Onyomi.kanji = ?",
|
||||
KANJIDICTableNames.onyomi,
|
||||
where: "kanji = ?",
|
||||
whereArgs: [kanji],
|
||||
orderBy: "orderNum",
|
||||
);
|
||||
|
||||
late final List<Map<String, Object?>> meanings;
|
||||
final meanings_query = connection.query(
|
||||
"KANJIDIC_Meaning",
|
||||
where: "KANJIDIC_Meaning.kanji = ? AND KANJIDIC_Meaning.language = ?",
|
||||
KANJIDICTableNames.meaning,
|
||||
where: "kanji = ? AND language = ?",
|
||||
whereArgs: [kanji, 'eng'],
|
||||
orderBy: "orderNum",
|
||||
);
|
||||
|
||||
late final List<Map<String, Object?>> nanoris;
|
||||
final nanoris_query = connection.query(
|
||||
"KANJIDIC_Nanori",
|
||||
where: "KANJIDIC_Nanori.kanji = ?",
|
||||
KANJIDICTableNames.nanori,
|
||||
where: "kanji = ?",
|
||||
whereArgs: [kanji],
|
||||
);
|
||||
|
||||
late final List<Map<String, Object?>> dictionary_references;
|
||||
final dictionary_references_query = connection.query(
|
||||
"KANJIDIC_DictionaryReference",
|
||||
where: "KANJIDIC_DictionaryReference.kanji = ?",
|
||||
KANJIDICTableNames.dictionaryReference,
|
||||
where: "kanji = ?",
|
||||
whereArgs: [kanji],
|
||||
);
|
||||
|
||||
late final List<Map<String, Object?>> query_codes;
|
||||
final query_codes_query = connection.query(
|
||||
"KANJIDIC_QueryCode",
|
||||
where: "KANJIDIC_QueryCode.kanji = ?",
|
||||
KANJIDICTableNames.queryCode,
|
||||
where: "kanji = ?",
|
||||
whereArgs: [kanji],
|
||||
);
|
||||
|
||||
late final List<Map<String, Object?>> radicals;
|
||||
final radicals_query = connection.query(
|
||||
"KANJIDIC_Radical",
|
||||
where: "KANJIDIC_Radical.kanji = ?",
|
||||
whereArgs: [kanji],
|
||||
final radicals_query = connection.rawQuery(
|
||||
'''
|
||||
SELECT DISTINCT
|
||||
"XREF__KANJIDIC_Radical__RADKFILE"."radicalSymbol" AS "symbol",
|
||||
"names"
|
||||
FROM "${KANJIDICTableNames.radical}"
|
||||
JOIN "XREF__KANJIDIC_Radical__RADKFILE" USING ("radicalId")
|
||||
LEFT JOIN (
|
||||
SELECT "radicalId", group_concat("name") AS "names"
|
||||
FROM "${KANJIDICTableNames.radicalName}"
|
||||
GROUP BY "radicalId"
|
||||
) USING ("radicalId")
|
||||
WHERE "${KANJIDICTableNames.radical}"."kanji" = ?
|
||||
''',
|
||||
[kanji],
|
||||
);
|
||||
|
||||
late final List<Map<String, Object?>> radical_names;
|
||||
final radical_names_query = connection.query(
|
||||
"KANJIDIC_RadicalName",
|
||||
where: "KANJIDIC_RadicalName.kanji = ?",
|
||||
late final List<Map<String, Object?>> parts;
|
||||
final parts_query = connection.query(
|
||||
RADKFILETableNames.radkfile,
|
||||
where: "kanji = ?",
|
||||
whereArgs: [kanji],
|
||||
);
|
||||
|
||||
late final List<Map<String, Object?>> readings;
|
||||
final readings_query = connection.query(
|
||||
"KANJIDIC_Reading",
|
||||
where: "KANJIDIC_Reading.kanji = ?",
|
||||
KANJIDICTableNames.reading,
|
||||
where: "kanji = ?",
|
||||
whereArgs: [kanji],
|
||||
);
|
||||
|
||||
late final List<Map<String, Object?>> stroke_miscounts;
|
||||
final stroke_miscounts_query = connection.query(
|
||||
"KANJIDIC_StrokeMiscount",
|
||||
where: "KANJIDIC_StrokeMiscount.kanji = ?",
|
||||
KANJIDICTableNames.strokeMiscount,
|
||||
where: "kanji = ?",
|
||||
whereArgs: [kanji],
|
||||
);
|
||||
|
||||
late final List<Map<String, Object?>> variants;
|
||||
final variants_query = connection.query(
|
||||
"KANJIDIC_Variant",
|
||||
where: "KANJIDIC_Variant.kanji = ?",
|
||||
whereArgs: [kanji],
|
||||
);
|
||||
// TODO: add variant data to result
|
||||
// late final List<Map<String, Object?>> variants;
|
||||
// final variants_query = connection.query(
|
||||
// KANJIDICTableNames.variant,
|
||||
// where: "kanji = ?",
|
||||
// whereArgs: [kanji],
|
||||
// );
|
||||
|
||||
// TODO: Search for kunyomi and onyomi usage of the characters
|
||||
// from JMDict. We'll need to fuzzy aquery JMDict_KanjiElement for mathces,
|
||||
@@ -116,26 +136,54 @@ Future<KanjiSearchResult?> searchKanjiWithDbConnection(
|
||||
dictionary_references_query.then((value) => dictionary_references = value),
|
||||
query_codes_query.then((value) => query_codes = value),
|
||||
radicals_query.then((value) => radicals = value),
|
||||
radical_names_query.then((value) => radical_names = value),
|
||||
parts_query.then((value) => parts = value),
|
||||
readings_query.then((value) => readings = value),
|
||||
stroke_miscounts_query.then((value) => stroke_miscounts = value),
|
||||
variants_query.then((value) => variants = value),
|
||||
// variants_query.then((value) => variants = value),
|
||||
});
|
||||
|
||||
final entry = characters.first;
|
||||
|
||||
final String? grade = {
|
||||
1: 'grade 1',
|
||||
2: 'grade 2',
|
||||
3: 'grade 3',
|
||||
4: 'grade 4',
|
||||
5: 'grade 5',
|
||||
6: 'grade 6',
|
||||
7: 'grade 7',
|
||||
8: 'grade 8',
|
||||
9: 'grade 9',
|
||||
10: 'grade 10',
|
||||
}[entry['grade'] as int?];
|
||||
assert(radicals.length <= 1, 'There should be at most one radical per kanji');
|
||||
final radical = radicals.isNotEmpty
|
||||
? KanjiSearchRadical(
|
||||
symbol: radicals.first['symbol'] as String,
|
||||
names: (radicals.first['names'] as String?)?.split(',') ?? [],
|
||||
// TODO: add radical form data
|
||||
forms: [],
|
||||
// TODO: add radical meaning data
|
||||
meanings: [],
|
||||
)
|
||||
: null;
|
||||
|
||||
final alternativeLanguageReadings = readings
|
||||
.groupListsBy(
|
||||
(item) => item['type'] as String,
|
||||
)
|
||||
.map(
|
||||
(key, value) => MapEntry(
|
||||
key,
|
||||
value.map((item) => item['reading'] as String).toList(),
|
||||
),
|
||||
);
|
||||
|
||||
// TODO: Add `SKIPMisclassification` to the entries
|
||||
final queryCodes = query_codes
|
||||
.groupListsBy(
|
||||
(item) => item['type'] as String,
|
||||
)
|
||||
.map(
|
||||
(key, value) => MapEntry(
|
||||
key,
|
||||
value.map((item) => item['code'] as String).toList(),
|
||||
),
|
||||
);
|
||||
|
||||
// TODO: Add `volume` and `page` to the entries
|
||||
final dictionaryReferences = {
|
||||
for (final entry in dictionary_references)
|
||||
entry['type'] as String: entry['ref'] as String,
|
||||
};
|
||||
|
||||
final String? jlptLevel = {
|
||||
5: 'N5',
|
||||
@@ -147,12 +195,27 @@ Future<KanjiSearchResult?> searchKanjiWithDbConnection(
|
||||
|
||||
return KanjiSearchResult(
|
||||
kanji: entry['literal']! as String,
|
||||
taughtIn: grade,
|
||||
taughtIn: entry['grade'] as int?,
|
||||
jlptLevel: jlptLevel,
|
||||
newspaperFrequencyRank: entry['frequency'] as int?,
|
||||
strokeCount: entry['strokeCount'] as int,
|
||||
meanings: meanings.map((item) => item['meaning'] as String).toList(),
|
||||
kunyomi: kunyomis.map((item) => item['yomi'] as String).toList(),
|
||||
onyomi: onyomis.map((item) => item['yomi'] as String).toList(),
|
||||
parts: parts.map((item) => item['radical'] as String).toList(),
|
||||
onyomi: onyomis
|
||||
.map((item) => item['yomi'] as String)
|
||||
.map(transliterateHiraganaToKatakana)
|
||||
.toList(),
|
||||
radical: radical,
|
||||
codepoints: {
|
||||
for (final codepoint in codepoints)
|
||||
codepoint['type'] as String: codepoint['codepoint'] as String,
|
||||
},
|
||||
nanori: nanoris.map((item) => item['nanori'] as String).toList(),
|
||||
alternativeLanguageReadings: alternativeLanguageReadings,
|
||||
strokeMiscounts:
|
||||
stroke_miscounts.map((item) => item['strokeCount'] as int).toList(),
|
||||
queryCodes: queryCodes,
|
||||
dictionaryReferences: dictionaryReferences,
|
||||
);
|
||||
}
|
||||
|
||||
55
lib/search/radical_search.dart
Normal file
55
lib/search/radical_search.dart
Normal file
@@ -0,0 +1,55 @@
|
||||
import 'package:jadb/table_names/radkfile.dart';
|
||||
import 'package:sqflite_common/sqlite_api.dart';
|
||||
|
||||
// TODO: validate that the list of radicals all are valid radicals
|
||||
|
||||
Future<List<String>> searchRemainingRadicalsWithDbConnection(
|
||||
DatabaseExecutor connection,
|
||||
List<String> radicals,
|
||||
) async {
|
||||
final queryResult = await connection.rawQuery(
|
||||
'''
|
||||
SELECT DISTINCT "radical"
|
||||
FROM "${RADKFILETableNames.radkfile}"
|
||||
WHERE "kanji" IN (
|
||||
SELECT "kanji"
|
||||
FROM "${RADKFILETableNames.radkfile}"
|
||||
WHERE "radical" IN (${List.filled(radicals.length, '?').join(',')})
|
||||
GROUP BY "kanji"
|
||||
HAVING COUNT(DISTINCT "radical") = ?
|
||||
)
|
||||
''',
|
||||
[
|
||||
...radicals,
|
||||
radicals.length,
|
||||
],
|
||||
);
|
||||
|
||||
final remainingRadicals =
|
||||
queryResult.map((row) => row['radical'] as String).toList();
|
||||
|
||||
return remainingRadicals;
|
||||
}
|
||||
|
||||
Future<List<String>> searchKanjiByRadicalsWithDbConnection(
|
||||
DatabaseExecutor connection,
|
||||
List<String> radicals,
|
||||
) async {
|
||||
final queryResult = await connection.rawQuery(
|
||||
'''
|
||||
SELECT "kanji"
|
||||
FROM "${RADKFILETableNames.radkfile}"
|
||||
WHERE "radical" IN (${List.filled(radicals.length, '?').join(',')})
|
||||
GROUP BY "kanji"
|
||||
HAVING COUNT(DISTINCT "radical") = ?
|
||||
''',
|
||||
[
|
||||
...radicals,
|
||||
radicals.length,
|
||||
],
|
||||
);
|
||||
|
||||
final kanji = queryResult.map((row) => row['kanji'] as String).toList();
|
||||
|
||||
return kanji;
|
||||
}
|
||||
@@ -1,503 +0,0 @@
|
||||
import 'package:collection/collection.dart';
|
||||
import 'package:jadb/models/jmdict/jmdict_dialect.dart';
|
||||
import 'package:jadb/models/jmdict/jmdict_field.dart';
|
||||
import 'package:jadb/models/jmdict/jmdict_kanji_info.dart';
|
||||
import 'package:jadb/models/jmdict/jmdict_misc.dart';
|
||||
import 'package:jadb/models/jmdict/jmdict_pos.dart';
|
||||
import 'package:jadb/models/jmdict/jmdict_reading_info.dart';
|
||||
import 'package:jadb/models/word_search/word_search_result.dart';
|
||||
import 'package:jadb/models/word_search/word_search_ruby.dart';
|
||||
import 'package:jadb/models/word_search/word_search_sense.dart';
|
||||
import 'package:jadb/models/word_search/word_search_sources.dart';
|
||||
import 'package:jadb/models/word_search/word_search_xref_entry.dart';
|
||||
import 'package:jadb/util/sqlite_utils.dart';
|
||||
import 'package:sqflite_common/sqlite_api.dart';
|
||||
|
||||
// TODO: Support globs
|
||||
|
||||
// TODO: Support tags
|
||||
|
||||
// TODO: Prefer original kana type when sorting results
|
||||
|
||||
// TODO: Support mixing kana and romaji
|
||||
|
||||
Future<List<WordSearchResult>?> searchWordWithDbConnection(
|
||||
DatabaseExecutor connection,
|
||||
String word, {
|
||||
bool isKana = true,
|
||||
}) async {
|
||||
if (word.isEmpty) {
|
||||
return null;
|
||||
}
|
||||
|
||||
late final List<int> entryIds;
|
||||
if (isKana) {
|
||||
entryIds = (await connection.query(
|
||||
'JMdict_EntryByKana',
|
||||
where: 'kana LIKE ?',
|
||||
whereArgs: ['$word%'],
|
||||
))
|
||||
.map((row) => row['entryId'] as int)
|
||||
.toList();
|
||||
} else {
|
||||
entryIds = (await connection.query(
|
||||
'JMdict_EntryByEnglish',
|
||||
where: 'english LIKE ?',
|
||||
whereArgs: ['$word%'],
|
||||
))
|
||||
.map((row) => row['entryId'] as int)
|
||||
.toList();
|
||||
}
|
||||
|
||||
if (entryIds.isEmpty) {
|
||||
return [];
|
||||
}
|
||||
|
||||
late final List<Map<String, Object?>> senses;
|
||||
final Future<List<Map<String, Object?>>> senses_query = connection.query(
|
||||
'JMdict_Sense',
|
||||
where: 'entryId IN (${entryIds.join(',')})',
|
||||
);
|
||||
|
||||
late final List<Map<String, Object?>> readingElements;
|
||||
final Future<List<Map<String, Object?>>> readingElements_query =
|
||||
connection.query(
|
||||
'JMdict_ReadingElement',
|
||||
where: 'entryId IN (${entryIds.join(',')})',
|
||||
);
|
||||
|
||||
late final List<Map<String, Object?>> kanjiElements;
|
||||
final Future<List<Map<String, Object?>>> kanjiElements_query =
|
||||
connection.query(
|
||||
'JMdict_KanjiElement',
|
||||
where: 'entryId IN (${entryIds.join(',')})',
|
||||
);
|
||||
|
||||
await Future.wait([
|
||||
senses_query.then((value) => senses = value),
|
||||
readingElements_query.then((value) => readingElements = value),
|
||||
kanjiElements_query.then((value) => kanjiElements = value),
|
||||
]);
|
||||
|
||||
// Sense queries
|
||||
|
||||
final senseIds = senses.map((element) => element['id'] as int).toList();
|
||||
|
||||
late final List<Map<String, Object?>> senseAntonyms;
|
||||
final Future<List<Map<String, Object?>>> senseAntonyms_query =
|
||||
connection.query(
|
||||
'JMdict_SenseAntonym',
|
||||
where: 'senseId IN (${senseIds.join(',')})',
|
||||
);
|
||||
|
||||
late final List<Map<String, Object?>> senseDialects;
|
||||
final Future<List<Map<String, Object?>>> senseDialects_query =
|
||||
connection.query(
|
||||
'JMdict_SenseDialect',
|
||||
where: 'senseId IN (${senseIds.join(',')})',
|
||||
);
|
||||
|
||||
late final List<Map<String, Object?>> senseFields;
|
||||
final Future<List<Map<String, Object?>>> senseFields_query = connection.query(
|
||||
'JMdict_SenseField',
|
||||
where: 'senseId IN (${senseIds.join(',')})',
|
||||
);
|
||||
|
||||
late final List<Map<String, Object?>> senseGlossaries;
|
||||
final Future<List<Map<String, Object?>>> senseGlossaries_query =
|
||||
connection.query(
|
||||
'JMdict_SenseGlossary',
|
||||
where: 'senseId IN (${senseIds.join(',')})',
|
||||
);
|
||||
|
||||
late final List<Map<String, Object?>> senseInfos;
|
||||
final Future<List<Map<String, Object?>>> senseInfos_query = connection.query(
|
||||
'JMdict_SenseInfo',
|
||||
where: 'senseId IN (${senseIds.join(',')})',
|
||||
);
|
||||
|
||||
late final List<Map<String, Object?>> senseLanguageSources;
|
||||
final Future<List<Map<String, Object?>>> senseLanguageSources_query =
|
||||
connection.query(
|
||||
'JMdict_SenseLanguageSource',
|
||||
where: 'senseId IN (${senseIds.join(',')})',
|
||||
);
|
||||
|
||||
late final List<Map<String, Object?>> senseMiscs;
|
||||
final Future<List<Map<String, Object?>>> senseMiscs_query = connection.query(
|
||||
'JMdict_SenseMisc',
|
||||
where: 'senseId IN (${senseIds.join(',')})',
|
||||
);
|
||||
|
||||
late final List<Map<String, Object?>> sensePOSs;
|
||||
final Future<List<Map<String, Object?>>> sensePOSs_query = connection.query(
|
||||
'JMdict_SensePOS',
|
||||
where: 'senseId IN (${senseIds.join(',')})',
|
||||
);
|
||||
|
||||
late final List<Map<String, Object?>> senseRestrictedToKanjis;
|
||||
final Future<List<Map<String, Object?>>> senseRestrictedToKanjis_query =
|
||||
connection.query(
|
||||
'JMdict_SenseRestrictedToKanji',
|
||||
where: 'senseId IN (${senseIds.join(',')})',
|
||||
);
|
||||
|
||||
late final List<Map<String, Object?>> senseRestrictedToReadings;
|
||||
final Future<List<Map<String, Object?>>> senseRestrictedToReadings_query =
|
||||
connection.query(
|
||||
'JMdict_SenseRestrictedToReading',
|
||||
where: 'senseId IN (${senseIds.join(',')})',
|
||||
);
|
||||
|
||||
late final List<Map<String, Object?>> senseSeeAlsos;
|
||||
final Future<List<Map<String, Object?>>> senseSeeAlsos_query =
|
||||
connection.query(
|
||||
'JMdict_SenseSeeAlso',
|
||||
where: 'senseId IN (${senseIds.join(',')})',
|
||||
);
|
||||
|
||||
late final List<Map<String, Object?>> exampleSentences;
|
||||
final Future<List<Map<String, Object?>>> exampleSentences_query =
|
||||
connection.query(
|
||||
'JMdict_ExampleSentence',
|
||||
where: 'senseId IN (${senseIds.join(',')})',
|
||||
);
|
||||
|
||||
// Reading queries
|
||||
|
||||
final readingIds = readingElements
|
||||
.map((element) => (
|
||||
element['entryId'] as int,
|
||||
escapeStringValue(element['reading'] as String)
|
||||
))
|
||||
.toList();
|
||||
|
||||
late final List<Map<String, Object?>> readingElementInfos;
|
||||
final Future<List<Map<String, Object?>>> readingElementInfos_query =
|
||||
connection.query(
|
||||
'JMdict_ReadingElementInfo',
|
||||
where: '(entryId, reading) IN (${readingIds.join(',')})',
|
||||
);
|
||||
|
||||
late final List<Map<String, Object?>> readingElementRestrictions;
|
||||
final Future<List<Map<String, Object?>>> readingElementRestrictions_query =
|
||||
connection.query(
|
||||
'JMdict_ReadingElementRestriction',
|
||||
where: '(entryId, reading) IN (${readingIds.join(',')})',
|
||||
);
|
||||
|
||||
// Kanji queries
|
||||
|
||||
final kanjiIds = kanjiElements
|
||||
.map((element) => (
|
||||
element['entryId'] as int,
|
||||
escapeStringValue(element['reading'] as String)
|
||||
))
|
||||
.toList();
|
||||
|
||||
late final List<Map<String, Object?>> kanjiElementInfos;
|
||||
final Future<List<Map<String, Object?>>> kanjiElementInfos_query =
|
||||
connection.query(
|
||||
'JMdict_KanjiElementInfo',
|
||||
where: '(entryId, reading) IN (${kanjiIds.join(',')})',
|
||||
);
|
||||
|
||||
await Future.wait([
|
||||
senseAntonyms_query.then((value) => senseAntonyms = value),
|
||||
senseDialects_query.then((value) => senseDialects = value),
|
||||
senseFields_query.then((value) => senseFields = value),
|
||||
senseGlossaries_query.then((value) => senseGlossaries = value),
|
||||
senseInfos_query.then((value) => senseInfos = value),
|
||||
senseLanguageSources_query.then((value) => senseLanguageSources = value),
|
||||
senseMiscs_query.then((value) => senseMiscs = value),
|
||||
sensePOSs_query.then((value) => sensePOSs = value),
|
||||
senseRestrictedToKanjis_query
|
||||
.then((value) => senseRestrictedToKanjis = value),
|
||||
senseRestrictedToReadings_query
|
||||
.then((value) => senseRestrictedToReadings = value),
|
||||
senseSeeAlsos_query.then((value) => senseSeeAlsos = value),
|
||||
exampleSentences_query.then((value) => exampleSentences = value),
|
||||
readingElementInfos_query.then((value) => readingElementInfos = value),
|
||||
readingElementRestrictions_query
|
||||
.then((value) => readingElementRestrictions = value),
|
||||
kanjiElementInfos_query.then((value) => kanjiElementInfos = value),
|
||||
]);
|
||||
|
||||
return _regroupWordSearchResults(
|
||||
entryIds: entryIds,
|
||||
readingElements: readingElements,
|
||||
kanjiElements: kanjiElements,
|
||||
senses: senses,
|
||||
senseAntonyms: senseAntonyms,
|
||||
senseDialects: senseDialects,
|
||||
senseFields: senseFields,
|
||||
senseGlossaries: senseGlossaries,
|
||||
senseInfos: senseInfos,
|
||||
senseLanguageSources: senseLanguageSources,
|
||||
senseMiscs: senseMiscs,
|
||||
sensePOSs: sensePOSs,
|
||||
senseRestrictedToKanjis: senseRestrictedToKanjis,
|
||||
senseRestrictedToReadings: senseRestrictedToReadings,
|
||||
senseSeeAlsos: senseSeeAlsos,
|
||||
exampleSentences: exampleSentences,
|
||||
readingElementInfos: readingElementInfos,
|
||||
readingElementRestrictions: readingElementRestrictions,
|
||||
kanjiElementInfos: kanjiElementInfos,
|
||||
);
|
||||
}
|
||||
|
||||
List<WordSearchResult> _regroupWordSearchResults({
|
||||
required List<int> entryIds,
|
||||
required List<Map<String, Object?>> readingElements,
|
||||
required List<Map<String, Object?>> kanjiElements,
|
||||
required List<Map<String, Object?>> senses,
|
||||
required List<Map<String, Object?>> senseAntonyms,
|
||||
required List<Map<String, Object?>> senseDialects,
|
||||
required List<Map<String, Object?>> senseFields,
|
||||
required List<Map<String, Object?>> senseGlossaries,
|
||||
required List<Map<String, Object?>> senseInfos,
|
||||
required List<Map<String, Object?>> senseLanguageSources,
|
||||
required List<Map<String, Object?>> senseMiscs,
|
||||
required List<Map<String, Object?>> sensePOSs,
|
||||
required List<Map<String, Object?>> senseRestrictedToKanjis,
|
||||
required List<Map<String, Object?>> senseRestrictedToReadings,
|
||||
required List<Map<String, Object?>> senseSeeAlsos,
|
||||
required List<Map<String, Object?>> exampleSentences,
|
||||
required List<Map<String, Object?>> readingElementInfos,
|
||||
required List<Map<String, Object?>> readingElementRestrictions,
|
||||
required List<Map<String, Object?>> kanjiElementInfos,
|
||||
}) {
|
||||
final List<WordSearchResult> results = [];
|
||||
|
||||
for (final entryId in entryIds) {
|
||||
final List<Map<String, Object?>> entryReadingElements = readingElements
|
||||
.where((element) => element['entryId'] == entryId)
|
||||
.toList();
|
||||
|
||||
final List<Map<String, Object?>> entryKanjiElements = kanjiElements
|
||||
.where((element) => element['entryId'] == entryId)
|
||||
.toList();
|
||||
|
||||
final List<Map<String, Object?>> entrySenses =
|
||||
senses.where((element) => element['entryId'] == entryId).toList();
|
||||
|
||||
final GroupedWordResult entryReadingElementsGrouped = _regroup_words(
|
||||
entryId: entryId,
|
||||
readingElements: entryReadingElements,
|
||||
kanjiElements: entryKanjiElements,
|
||||
readingElementInfos: readingElementInfos,
|
||||
readingElementRestrictions: readingElementRestrictions,
|
||||
kanjiElementInfos: kanjiElementInfos,
|
||||
);
|
||||
|
||||
final List<WordSearchSense> entrySensesGrouped = _regroup_senses(
|
||||
senses: entrySenses,
|
||||
senseAntonyms: senseAntonyms,
|
||||
senseDialects: senseDialects,
|
||||
senseFields: senseFields,
|
||||
senseGlossaries: senseGlossaries,
|
||||
senseInfos: senseInfos,
|
||||
senseLanguageSources: senseLanguageSources,
|
||||
senseMiscs: senseMiscs,
|
||||
sensePOSs: sensePOSs,
|
||||
senseRestrictedToKanjis: senseRestrictedToKanjis,
|
||||
senseRestrictedToReadings: senseRestrictedToReadings,
|
||||
senseSeeAlsos: senseSeeAlsos,
|
||||
exampleSentences: exampleSentences,
|
||||
);
|
||||
|
||||
results.add(
|
||||
WordSearchResult(
|
||||
entryId: entryId,
|
||||
japanese: entryReadingElementsGrouped.rubys,
|
||||
kanjiInfo: entryReadingElementsGrouped.kanjiInfos,
|
||||
readingInfo: entryReadingElementsGrouped.readingInfos,
|
||||
senses: entrySensesGrouped,
|
||||
sources: const WordSearchSources(
|
||||
jmdict: true,
|
||||
jmnedict: false,
|
||||
),
|
||||
),
|
||||
);
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
class GroupedWordResult {
|
||||
final List<WordSearchRuby> rubys;
|
||||
final Map<String, JMdictReadingInfo> readingInfos;
|
||||
final Map<String, JMdictKanjiInfo> kanjiInfos;
|
||||
|
||||
const GroupedWordResult({
|
||||
required this.rubys,
|
||||
required this.readingInfos,
|
||||
required this.kanjiInfos,
|
||||
});
|
||||
}
|
||||
|
||||
GroupedWordResult _regroup_words({
|
||||
required int entryId,
|
||||
required List<Map<String, Object?>> kanjiElements,
|
||||
required List<Map<String, Object?>> kanjiElementInfos,
|
||||
required List<Map<String, Object?>> readingElements,
|
||||
required List<Map<String, Object?>> readingElementInfos,
|
||||
required List<Map<String, Object?>> readingElementRestrictions,
|
||||
}) {
|
||||
final List<WordSearchRuby> result = [];
|
||||
|
||||
final kanjiElements_ =
|
||||
kanjiElements.where((element) => element['entryId'] == entryId).toList();
|
||||
|
||||
final readingElements_ = readingElements
|
||||
.where((element) => element['entryId'] == entryId)
|
||||
.toList();
|
||||
|
||||
final readingElementRestrictions_ = readingElementRestrictions
|
||||
.where((element) => element['entryId'] == entryId)
|
||||
.toList();
|
||||
|
||||
for (final readingElement in readingElements_) {
|
||||
for (final kanjiElement in kanjiElements_) {
|
||||
final kanji = kanjiElement['reading'] as String;
|
||||
final reading = readingElement['reading'] as String;
|
||||
|
||||
final doesNotMatchKanji = readingElement['doesNotMatchKanji'] == 1;
|
||||
if (doesNotMatchKanji) {
|
||||
continue;
|
||||
}
|
||||
|
||||
final restrictions = readingElementRestrictions_
|
||||
.where((element) => element['reading'] == reading)
|
||||
.toList();
|
||||
|
||||
if (restrictions.isNotEmpty &&
|
||||
!restrictions.any((element) => element['restriction'] == kanji)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
final ruby = WordSearchRuby(
|
||||
base: kanji,
|
||||
furigana: reading,
|
||||
);
|
||||
result.add(ruby);
|
||||
}
|
||||
}
|
||||
|
||||
for (final readingElement
|
||||
in readingElements_.where((e) => e['doesNotMatchKanji'] == 1)) {
|
||||
final reading = readingElement['reading'] as String;
|
||||
final ruby = WordSearchRuby(
|
||||
base: reading,
|
||||
);
|
||||
result.add(ruby);
|
||||
}
|
||||
|
||||
return GroupedWordResult(
|
||||
rubys: result,
|
||||
readingInfos: Map.fromEntries(
|
||||
readingElementInfos.map((e) => MapEntry(
|
||||
e['reading'] as String,
|
||||
JMdictReadingInfo.fromId(e['info'] as String),
|
||||
)),
|
||||
),
|
||||
kanjiInfos: Map.fromEntries(
|
||||
kanjiElementInfos.map((e) => MapEntry(
|
||||
e['reading'] as String,
|
||||
JMdictKanjiInfo.fromId(e['info'] as String),
|
||||
)),
|
||||
),
|
||||
);
|
||||
}
|
||||
|
||||
List<WordSearchSense> _regroup_senses({
|
||||
required List<Map<String, Object?>> senses,
|
||||
required List<Map<String, Object?>> senseAntonyms,
|
||||
required List<Map<String, Object?>> senseDialects,
|
||||
required List<Map<String, Object?>> senseFields,
|
||||
required List<Map<String, Object?>> senseGlossaries,
|
||||
required List<Map<String, Object?>> senseInfos,
|
||||
required List<Map<String, Object?>> senseLanguageSources,
|
||||
required List<Map<String, Object?>> senseMiscs,
|
||||
required List<Map<String, Object?>> sensePOSs,
|
||||
required List<Map<String, Object?>> senseRestrictedToKanjis,
|
||||
required List<Map<String, Object?>> senseRestrictedToReadings,
|
||||
required List<Map<String, Object?>> senseSeeAlsos,
|
||||
required List<Map<String, Object?>> exampleSentences,
|
||||
}) {
|
||||
final groupedSenseAntonyms =
|
||||
senseAntonyms.groupListsBy((element) => element['senseId'] as int);
|
||||
final groupedSenseDialects =
|
||||
senseDialects.groupListsBy((element) => element['senseId'] as int);
|
||||
final groupedSenseFields =
|
||||
senseFields.groupListsBy((element) => element['senseId'] as int);
|
||||
final groupedSenseGlossaries =
|
||||
senseGlossaries.groupListsBy((element) => element['senseId'] as int);
|
||||
final groupedSenseInfos =
|
||||
senseInfos.groupListsBy((element) => element['senseId'] as int);
|
||||
final groupedSenseLanguageSources =
|
||||
senseLanguageSources.groupListsBy((element) => element['senseId'] as int);
|
||||
final groupedSenseMiscs =
|
||||
senseMiscs.groupListsBy((element) => element['senseId'] as int);
|
||||
final groupedSensePOSs =
|
||||
sensePOSs.groupListsBy((element) => element['senseId'] as int);
|
||||
final groupedSenseRestrictedToKanjis = senseRestrictedToKanjis
|
||||
.groupListsBy((element) => element['senseId'] as int);
|
||||
final groupedSenseRestrictedToReadings = senseRestrictedToReadings
|
||||
.groupListsBy((element) => element['senseId'] as int);
|
||||
final groupedSenseSeeAlsos =
|
||||
senseSeeAlsos.groupListsBy((element) => element['senseId'] as int);
|
||||
|
||||
final List<WordSearchSense> result = [];
|
||||
for (final sense in senses) {
|
||||
final int senseId = sense['id'] as int;
|
||||
|
||||
final antonyms = groupedSenseAntonyms[senseId] ?? [];
|
||||
final dialects = groupedSenseDialects[senseId] ?? [];
|
||||
final fields = groupedSenseFields[senseId] ?? [];
|
||||
final glossaries = groupedSenseGlossaries[senseId] ?? [];
|
||||
final infos = groupedSenseInfos[senseId] ?? [];
|
||||
final languageSources = groupedSenseLanguageSources[senseId] ?? [];
|
||||
final miscs = groupedSenseMiscs[senseId] ?? [];
|
||||
final pos = groupedSensePOSs[senseId] ?? [];
|
||||
final restrictedToKanjis = groupedSenseRestrictedToKanjis[senseId] ?? [];
|
||||
final restrictedToReadings =
|
||||
groupedSenseRestrictedToReadings[senseId] ?? [];
|
||||
final seeAlsos = groupedSenseSeeAlsos[senseId] ?? [];
|
||||
|
||||
final resultSense = WordSearchSense(
|
||||
englishDefinitions: glossaries.map((e) => e['phrase'] as String).toList(),
|
||||
partsOfSpeech:
|
||||
pos.map((e) => JMdictPOS.fromId(e['pos'] as String)).toList(),
|
||||
seeAlso: seeAlsos
|
||||
.map((e) => WordSearchXrefEntry(
|
||||
entryId: e['xrefEntryId'] as int,
|
||||
ambiguous: e['ambiguous'] == 1,
|
||||
))
|
||||
.toList(),
|
||||
antonyms: antonyms
|
||||
.map((e) => WordSearchXrefEntry(
|
||||
entryId: e['xrefEntryId'] as int,
|
||||
ambiguous: e['ambiguous'] == 1,
|
||||
))
|
||||
.toList(),
|
||||
restrictedToReading:
|
||||
restrictedToReadings.map((e) => e['reading'] as String).toList(),
|
||||
restrictedToKanji:
|
||||
restrictedToKanjis.map((e) => e['kanji'] as String).toList(),
|
||||
fields:
|
||||
fields.map((e) => JMdictField.fromId(e['field'] as String)).toList(),
|
||||
dialects: dialects
|
||||
.map((e) => JMdictDialect.fromId(e['dialect'] as String))
|
||||
.toList(),
|
||||
misc: miscs.map((e) => JMdictMisc.fromId(e['misc'] as String)).toList(),
|
||||
info: infos.map((e) => e['info'] as String).toList(),
|
||||
languageSource:
|
||||
languageSources.map((e) => e['language'] as String).toList(),
|
||||
);
|
||||
|
||||
result.add(resultSense);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
309
lib/search/word_search/data_query.dart
Normal file
309
lib/search/word_search/data_query.dart
Normal file
@@ -0,0 +1,309 @@
|
||||
import 'package:jadb/table_names/jmdict.dart';
|
||||
import 'package:jadb/table_names/tanos_jlpt.dart';
|
||||
import 'package:jadb/util/sqlite_utils.dart';
|
||||
import 'package:sqflite_common/sqflite.dart';
|
||||
|
||||
class LinearWordQueryData {
|
||||
final List<Map<String, Object?>> senses;
|
||||
final List<Map<String, Object?>> readingElements;
|
||||
final List<Map<String, Object?>> kanjiElements;
|
||||
final List<Map<String, Object?>> jlptTags;
|
||||
final List<Map<String, Object?>> commonEntries;
|
||||
final List<Map<String, Object?>> senseAntonyms;
|
||||
final List<Map<String, Object?>> senseDialects;
|
||||
final List<Map<String, Object?>> senseFields;
|
||||
final List<Map<String, Object?>> senseGlossaries;
|
||||
final List<Map<String, Object?>> senseInfos;
|
||||
final List<Map<String, Object?>> senseLanguageSources;
|
||||
final List<Map<String, Object?>> senseMiscs;
|
||||
final List<Map<String, Object?>> sensePOSs;
|
||||
final List<Map<String, Object?>> senseRestrictedToKanjis;
|
||||
final List<Map<String, Object?>> senseRestrictedToReadings;
|
||||
final List<Map<String, Object?>> senseSeeAlsos;
|
||||
final List<Map<String, Object?>> exampleSentences;
|
||||
final List<Map<String, Object?>> readingElementInfos;
|
||||
final List<Map<String, Object?>> readingElementRestrictions;
|
||||
final List<Map<String, Object?>> kanjiElementInfos;
|
||||
|
||||
const LinearWordQueryData({
|
||||
required this.senses,
|
||||
required this.readingElements,
|
||||
required this.kanjiElements,
|
||||
required this.jlptTags,
|
||||
required this.commonEntries,
|
||||
required this.senseAntonyms,
|
||||
required this.senseDialects,
|
||||
required this.senseFields,
|
||||
required this.senseGlossaries,
|
||||
required this.senseInfos,
|
||||
required this.senseLanguageSources,
|
||||
required this.senseMiscs,
|
||||
required this.sensePOSs,
|
||||
required this.senseRestrictedToKanjis,
|
||||
required this.senseRestrictedToReadings,
|
||||
required this.senseSeeAlsos,
|
||||
required this.exampleSentences,
|
||||
required this.readingElementInfos,
|
||||
required this.readingElementRestrictions,
|
||||
required this.kanjiElementInfos,
|
||||
});
|
||||
}
|
||||
|
||||
Future<LinearWordQueryData> fetchLinearWordQueryData(
|
||||
DatabaseExecutor connection,
|
||||
List<int> entryIds,
|
||||
) async {
|
||||
late final List<Map<String, Object?>> senses;
|
||||
final Future<List<Map<String, Object?>>> senses_query = connection.query(
|
||||
JMdictTableNames.sense,
|
||||
where: 'entryId IN (${List.filled(entryIds.length, '?').join(',')})',
|
||||
whereArgs: entryIds,
|
||||
);
|
||||
|
||||
late final List<Map<String, Object?>> readingElements;
|
||||
final Future<List<Map<String, Object?>>> readingElements_query =
|
||||
connection.query(
|
||||
JMdictTableNames.readingElement,
|
||||
where: 'entryId IN (${List.filled(entryIds.length, '?').join(',')})',
|
||||
whereArgs: entryIds,
|
||||
orderBy: 'orderNum',
|
||||
);
|
||||
|
||||
late final List<Map<String, Object?>> kanjiElements;
|
||||
final Future<List<Map<String, Object?>>> kanjiElements_query =
|
||||
connection.query(
|
||||
JMdictTableNames.kanjiElement,
|
||||
where: 'entryId IN (${List.filled(entryIds.length, '?').join(',')})',
|
||||
whereArgs: entryIds,
|
||||
orderBy: 'orderNum',
|
||||
);
|
||||
|
||||
late final List<Map<String, Object?>> jlptTags;
|
||||
final Future<List<Map<String, Object?>>> jlptTags_query = connection.query(
|
||||
TanosJLPTTableNames.jlptTag,
|
||||
where: 'entryId IN (${List.filled(entryIds.length, '?').join(',')})',
|
||||
whereArgs: entryIds,
|
||||
);
|
||||
|
||||
late final List<Map<String, Object?>> commonEntries;
|
||||
final Future<List<Map<String, Object?>>> commonEntries_query =
|
||||
connection.query(
|
||||
'JMdict_EntryCommon',
|
||||
where: 'entryId IN (${List.filled(entryIds.length, '?').join(',')})',
|
||||
whereArgs: entryIds,
|
||||
);
|
||||
|
||||
await Future.wait([
|
||||
senses_query.then((value) => senses = value),
|
||||
readingElements_query.then((value) => readingElements = value),
|
||||
kanjiElements_query.then((value) => kanjiElements = value),
|
||||
jlptTags_query.then((value) => jlptTags = value),
|
||||
commonEntries_query.then((value) => commonEntries = value),
|
||||
]);
|
||||
|
||||
// Sense queries
|
||||
|
||||
final senseIds = senses.map((sense) => sense['senseId'] as int).toList();
|
||||
|
||||
late final List<Map<String, Object?>> senseAntonyms;
|
||||
final Future<List<Map<String, Object?>>> senseAntonyms_query =
|
||||
connection.rawQuery(
|
||||
"""
|
||||
SELECT
|
||||
"${JMdictTableNames.senseAntonyms}".senseId,
|
||||
"${JMdictTableNames.senseAntonyms}".ambiguous,
|
||||
"${JMdictTableNames.senseAntonyms}".xrefEntryId,
|
||||
"JMdict_BaseAndFurigana"."base",
|
||||
"JMdict_BaseAndFurigana"."furigana"
|
||||
FROM "${JMdictTableNames.senseAntonyms}"
|
||||
JOIN "JMdict_BaseAndFurigana"
|
||||
ON "${JMdictTableNames.senseAntonyms}"."xrefEntryId" = "JMdict_BaseAndFurigana"."entryId"
|
||||
WHERE
|
||||
"senseId" IN (${List.filled(senseIds.length, '?').join(',')})
|
||||
AND "JMdict_BaseAndFurigana"."isFirst"
|
||||
ORDER BY
|
||||
"${JMdictTableNames.senseAntonyms}"."senseId",
|
||||
"${JMdictTableNames.senseAntonyms}"."xrefEntryId"
|
||||
""",
|
||||
[...senseIds],
|
||||
);
|
||||
|
||||
late final List<Map<String, Object?>> senseDialects;
|
||||
final Future<List<Map<String, Object?>>> senseDialects_query =
|
||||
connection.query(
|
||||
JMdictTableNames.senseDialect,
|
||||
where: 'senseId IN (${List.filled(senseIds.length, '?').join(',')})',
|
||||
whereArgs: senseIds,
|
||||
);
|
||||
|
||||
late final List<Map<String, Object?>> senseFields;
|
||||
final Future<List<Map<String, Object?>>> senseFields_query = connection.query(
|
||||
JMdictTableNames.senseField,
|
||||
where: 'senseId IN (${List.filled(senseIds.length, '?').join(',')})',
|
||||
whereArgs: senseIds,
|
||||
);
|
||||
|
||||
late final List<Map<String, Object?>> senseGlossaries;
|
||||
final Future<List<Map<String, Object?>>> senseGlossaries_query =
|
||||
connection.query(
|
||||
JMdictTableNames.senseGlossary,
|
||||
where: 'senseId IN (${List.filled(senseIds.length, '?').join(',')})',
|
||||
whereArgs: senseIds,
|
||||
);
|
||||
|
||||
late final List<Map<String, Object?>> senseInfos;
|
||||
final Future<List<Map<String, Object?>>> senseInfos_query = connection.query(
|
||||
JMdictTableNames.senseInfo,
|
||||
where: 'senseId IN (${List.filled(senseIds.length, '?').join(',')})',
|
||||
whereArgs: senseIds,
|
||||
);
|
||||
|
||||
late final List<Map<String, Object?>> senseLanguageSources;
|
||||
final Future<List<Map<String, Object?>>> senseLanguageSources_query =
|
||||
connection.query(
|
||||
JMdictTableNames.senseLanguageSource,
|
||||
where: 'senseId IN (${List.filled(senseIds.length, '?').join(',')})',
|
||||
whereArgs: senseIds,
|
||||
);
|
||||
|
||||
late final List<Map<String, Object?>> senseMiscs;
|
||||
final Future<List<Map<String, Object?>>> senseMiscs_query = connection.query(
|
||||
JMdictTableNames.senseMisc,
|
||||
where: 'senseId IN (${List.filled(senseIds.length, '?').join(',')})',
|
||||
whereArgs: senseIds,
|
||||
);
|
||||
|
||||
late final List<Map<String, Object?>> sensePOSs;
|
||||
final Future<List<Map<String, Object?>>> sensePOSs_query = connection.query(
|
||||
JMdictTableNames.sensePOS,
|
||||
where: 'senseId IN (${List.filled(senseIds.length, '?').join(',')})',
|
||||
whereArgs: senseIds,
|
||||
);
|
||||
|
||||
late final List<Map<String, Object?>> senseRestrictedToKanjis;
|
||||
final Future<List<Map<String, Object?>>> senseRestrictedToKanjis_query =
|
||||
connection.query(
|
||||
JMdictTableNames.senseRestrictedToKanji,
|
||||
where: 'senseId IN (${List.filled(senseIds.length, '?').join(',')})',
|
||||
whereArgs: senseIds,
|
||||
);
|
||||
|
||||
late final List<Map<String, Object?>> senseRestrictedToReadings;
|
||||
final Future<List<Map<String, Object?>>> senseRestrictedToReadings_query =
|
||||
connection.query(
|
||||
JMdictTableNames.senseRestrictedToReading,
|
||||
where: 'senseId IN (${List.filled(senseIds.length, '?').join(',')})',
|
||||
whereArgs: senseIds,
|
||||
);
|
||||
|
||||
late final List<Map<String, Object?>> senseSeeAlsos;
|
||||
final Future<List<Map<String, Object?>>> senseSeeAlsos_query =
|
||||
connection.rawQuery(
|
||||
"""
|
||||
SELECT
|
||||
"${JMdictTableNames.senseSeeAlso}"."senseId",
|
||||
"${JMdictTableNames.senseSeeAlso}"."ambiguous",
|
||||
"${JMdictTableNames.senseSeeAlso}"."xrefEntryId",
|
||||
"JMdict_BaseAndFurigana"."base",
|
||||
"JMdict_BaseAndFurigana"."furigana"
|
||||
FROM "${JMdictTableNames.senseSeeAlso}"
|
||||
JOIN "JMdict_BaseAndFurigana"
|
||||
ON "${JMdictTableNames.senseSeeAlso}"."xrefEntryId" = "JMdict_BaseAndFurigana"."entryId"
|
||||
WHERE
|
||||
"senseId" IN (${List.filled(senseIds.length, '?').join(',')})
|
||||
AND "JMdict_BaseAndFurigana"."isFirst"
|
||||
ORDER BY
|
||||
"${JMdictTableNames.senseSeeAlso}"."senseId",
|
||||
"${JMdictTableNames.senseSeeAlso}"."xrefEntryId"
|
||||
""",
|
||||
[...senseIds],
|
||||
);
|
||||
|
||||
late final List<Map<String, Object?>> exampleSentences;
|
||||
final Future<List<Map<String, Object?>>> exampleSentences_query =
|
||||
connection.query(
|
||||
'JMdict_ExampleSentence',
|
||||
where: 'senseId IN (${List.filled(senseIds.length, '?').join(',')})',
|
||||
whereArgs: senseIds,
|
||||
);
|
||||
|
||||
// Reading queries
|
||||
|
||||
final readingIds = readingElements
|
||||
.map((element) => element['elementId'] as int)
|
||||
.toList();
|
||||
|
||||
late final List<Map<String, Object?>> readingElementInfos;
|
||||
final Future<List<Map<String, Object?>>> readingElementInfos_query =
|
||||
connection.query(
|
||||
JMdictTableNames.readingInfo,
|
||||
where: '(elementId) IN (${List.filled(readingIds.length, '?').join(',')})',
|
||||
whereArgs: readingIds,
|
||||
);
|
||||
|
||||
late final List<Map<String, Object?>> readingElementRestrictions;
|
||||
final Future<List<Map<String, Object?>>> readingElementRestrictions_query =
|
||||
connection.query(
|
||||
JMdictTableNames.readingRestriction,
|
||||
where: '(elementId) IN (${List.filled(readingIds.length, '?').join(',')})',
|
||||
whereArgs: readingIds,
|
||||
);
|
||||
|
||||
// Kanji queries
|
||||
|
||||
final kanjiIds = kanjiElements
|
||||
.map((element) => element['elementId'] as int)
|
||||
.toList();
|
||||
|
||||
late final List<Map<String, Object?>> kanjiElementInfos;
|
||||
final Future<List<Map<String, Object?>>> kanjiElementInfos_query =
|
||||
connection.query(
|
||||
JMdictTableNames.kanjiInfo,
|
||||
where: '(elementId) IN (${List.filled(kanjiIds.length, '?').join(',')})',
|
||||
whereArgs: kanjiIds,
|
||||
);
|
||||
|
||||
await Future.wait([
|
||||
senseAntonyms_query.then((value) => senseAntonyms = value),
|
||||
senseDialects_query.then((value) => senseDialects = value),
|
||||
senseFields_query.then((value) => senseFields = value),
|
||||
senseGlossaries_query.then((value) => senseGlossaries = value),
|
||||
senseInfos_query.then((value) => senseInfos = value),
|
||||
senseLanguageSources_query.then((value) => senseLanguageSources = value),
|
||||
senseMiscs_query.then((value) => senseMiscs = value),
|
||||
sensePOSs_query.then((value) => sensePOSs = value),
|
||||
senseRestrictedToKanjis_query
|
||||
.then((value) => senseRestrictedToKanjis = value),
|
||||
senseRestrictedToReadings_query
|
||||
.then((value) => senseRestrictedToReadings = value),
|
||||
senseSeeAlsos_query.then((value) => senseSeeAlsos = value),
|
||||
exampleSentences_query.then((value) => exampleSentences = value),
|
||||
readingElementInfos_query.then((value) => readingElementInfos = value),
|
||||
readingElementRestrictions_query
|
||||
.then((value) => readingElementRestrictions = value),
|
||||
kanjiElementInfos_query.then((value) => kanjiElementInfos = value),
|
||||
]);
|
||||
|
||||
return LinearWordQueryData(
|
||||
senses: senses,
|
||||
readingElements: readingElements,
|
||||
kanjiElements: kanjiElements,
|
||||
jlptTags: jlptTags,
|
||||
commonEntries: commonEntries,
|
||||
senseAntonyms: senseAntonyms,
|
||||
senseDialects: senseDialects,
|
||||
senseFields: senseFields,
|
||||
senseGlossaries: senseGlossaries,
|
||||
senseInfos: senseInfos,
|
||||
senseLanguageSources: senseLanguageSources,
|
||||
senseMiscs: senseMiscs,
|
||||
sensePOSs: sensePOSs,
|
||||
senseRestrictedToKanjis: senseRestrictedToKanjis,
|
||||
senseRestrictedToReadings: senseRestrictedToReadings,
|
||||
senseSeeAlsos: senseSeeAlsos,
|
||||
exampleSentences: exampleSentences,
|
||||
readingElementInfos: readingElementInfos,
|
||||
readingElementRestrictions: readingElementRestrictions,
|
||||
kanjiElementInfos: kanjiElementInfos,
|
||||
);
|
||||
}
|
||||
338
lib/search/word_search/entry_id_query.dart
Normal file
338
lib/search/word_search/entry_id_query.dart
Normal file
@@ -0,0 +1,338 @@
|
||||
import 'package:jadb/table_names/jmdict.dart';
|
||||
import 'package:jadb/search/word_search/word_search.dart';
|
||||
import 'package:jadb/util/text_filtering.dart';
|
||||
import 'package:sqflite_common/sqlite_api.dart';
|
||||
|
||||
class ScoredEntryId {
|
||||
final int entryId;
|
||||
final int score;
|
||||
|
||||
const ScoredEntryId(this.entryId, this.score);
|
||||
}
|
||||
|
||||
SearchMode _determineSearchMode(String word) {
|
||||
final bool containsKanji = kanjiRegex.hasMatch(word);
|
||||
final bool containsAscii = RegExp(r'[A-Za-z]').hasMatch(word);
|
||||
|
||||
if (containsKanji && containsAscii) {
|
||||
return SearchMode.MixedKanji;
|
||||
} else if (containsKanji) {
|
||||
return SearchMode.Kanji;
|
||||
} else if (containsAscii) {
|
||||
return SearchMode.English;
|
||||
} else if (word.contains(hiraganaRegex) || word.contains(katakanaRegex)) {
|
||||
return SearchMode.Kana;
|
||||
} else {
|
||||
return SearchMode.MixedKana;
|
||||
}
|
||||
}
|
||||
|
||||
/// FTS reacts to certain characters, so we should filter them out.
|
||||
String _filterFTSSensitiveCharacters(String word) {
|
||||
return word
|
||||
.replaceAll('.', '')
|
||||
.replaceAll('-', '')
|
||||
.replaceAll('*', '')
|
||||
.replaceAll('+', '')
|
||||
.replaceAll('(', '')
|
||||
.replaceAll(')', '')
|
||||
.replaceAll('^', '')
|
||||
.replaceAll('\"', '');
|
||||
}
|
||||
|
||||
(String, List<Object?>) _kanjiReadingTemplate(
|
||||
String tableName,
|
||||
String word, {
|
||||
int pageSize = 10,
|
||||
bool countOnly = false,
|
||||
}) =>
|
||||
(
|
||||
'''
|
||||
WITH
|
||||
fts_results AS (
|
||||
SELECT DISTINCT
|
||||
"${tableName}"."entryId",
|
||||
100
|
||||
+ (("${tableName}FTS"."reading" = ?) * 10000)
|
||||
+ "JMdict_EntryScore"."score"
|
||||
AS "score"
|
||||
FROM "${tableName}FTS"
|
||||
JOIN "${tableName}" USING ("elementId")
|
||||
JOIN "JMdict_EntryScore" USING ("elementId")
|
||||
WHERE "${tableName}FTS"."reading" MATCH ? || '*'
|
||||
AND "JMdict_EntryScore"."type" = '${tableName == JMdictTableNames.kanjiElement ? 'k' : 'r'}'
|
||||
${!countOnly ? 'LIMIT ?' : ''}
|
||||
),
|
||||
non_fts_results AS (
|
||||
SELECT DISTINCT
|
||||
"${tableName}"."entryId",
|
||||
50
|
||||
+ "JMdict_EntryScore"."score"
|
||||
AS "score"
|
||||
FROM "${tableName}"
|
||||
JOIN "JMdict_EntryScore" USING ("elementId")
|
||||
WHERE "reading" LIKE '%' || ? || '%'
|
||||
AND "${tableName}"."entryId" NOT IN (SELECT "entryId" FROM "fts_results")
|
||||
AND "JMdict_EntryScore"."type" = '${tableName == JMdictTableNames.kanjiElement ? 'k' : 'r'}'
|
||||
${!countOnly ? 'LIMIT ?' : ''}
|
||||
)
|
||||
|
||||
${countOnly ? 'SELECT COUNT("entryId") AS count' : 'SELECT "entryId", MAX("score") AS "score"'}
|
||||
FROM (
|
||||
SELECT * FROM fts_results
|
||||
UNION
|
||||
SELECT * FROM non_fts_results
|
||||
)
|
||||
GROUP BY "entryId"
|
||||
ORDER BY
|
||||
"score" DESC,
|
||||
"entryId" ASC
|
||||
'''
|
||||
.trim(),
|
||||
[
|
||||
_filterFTSSensitiveCharacters(word),
|
||||
_filterFTSSensitiveCharacters(word),
|
||||
if (!countOnly) pageSize,
|
||||
_filterFTSSensitiveCharacters(word),
|
||||
if (!countOnly) pageSize,
|
||||
]
|
||||
);
|
||||
|
||||
Future<List<ScoredEntryId>> _queryKanji(
|
||||
DatabaseExecutor connection,
|
||||
String word,
|
||||
int pageSize,
|
||||
int? offset,
|
||||
) {
|
||||
final (query, args) = _kanjiReadingTemplate(
|
||||
JMdictTableNames.kanjiElement,
|
||||
word,
|
||||
pageSize: pageSize,
|
||||
);
|
||||
return connection.rawQuery(query, args).then((result) => result
|
||||
.map((row) => ScoredEntryId(
|
||||
row['entryId'] as int,
|
||||
row['score'] as int,
|
||||
))
|
||||
.toList());
|
||||
}
|
||||
|
||||
Future<int> _queryKanjiCount(
|
||||
DatabaseExecutor connection,
|
||||
String word,
|
||||
) {
|
||||
final (query, args) = _kanjiReadingTemplate(
|
||||
JMdictTableNames.kanjiElement,
|
||||
word,
|
||||
countOnly: true,
|
||||
);
|
||||
return connection
|
||||
.rawQuery(query, args)
|
||||
.then((result) => result.first['count'] as int);
|
||||
}
|
||||
|
||||
Future<List<ScoredEntryId>> _queryKana(
|
||||
DatabaseExecutor connection,
|
||||
String word,
|
||||
int pageSize,
|
||||
int? offset,
|
||||
) {
|
||||
final (query, args) = _kanjiReadingTemplate(
|
||||
JMdictTableNames.readingElement,
|
||||
word,
|
||||
pageSize: pageSize,
|
||||
);
|
||||
return connection.rawQuery(query, args).then((result) => result
|
||||
.map((row) => ScoredEntryId(
|
||||
row['entryId'] as int,
|
||||
row['score'] as int,
|
||||
))
|
||||
.toList());
|
||||
}
|
||||
|
||||
Future<int> _queryKanaCount(
|
||||
DatabaseExecutor connection,
|
||||
String word,
|
||||
) {
|
||||
final (query, args) = _kanjiReadingTemplate(
|
||||
JMdictTableNames.readingElement,
|
||||
word,
|
||||
countOnly: true,
|
||||
);
|
||||
return connection
|
||||
.rawQuery(query, args)
|
||||
.then((result) => result.first['count'] as int);
|
||||
}
|
||||
|
||||
Future<List<ScoredEntryId>> _queryEnglish(
|
||||
DatabaseExecutor connection,
|
||||
String word,
|
||||
int pageSize,
|
||||
int? offset,
|
||||
) async {
|
||||
final result = await connection.rawQuery(
|
||||
'''
|
||||
SELECT
|
||||
"${JMdictTableNames.sense}"."entryId",
|
||||
MAX("JMdict_EntryScore"."score")
|
||||
+ (("${JMdictTableNames.senseGlossary}"."phrase" = ? AND "${JMdictTableNames.sense}"."orderNum" = 1) * 50)
|
||||
+ (("${JMdictTableNames.senseGlossary}"."phrase" = ? AND "${JMdictTableNames.sense}"."orderNum" = 2) * 30)
|
||||
+ (("${JMdictTableNames.senseGlossary}"."phrase" = ?) * 20)
|
||||
as "score"
|
||||
FROM "${JMdictTableNames.senseGlossary}"
|
||||
JOIN "${JMdictTableNames.sense}" USING ("senseId")
|
||||
JOIN "JMdict_EntryScore" USING ("entryId")
|
||||
WHERE "${JMdictTableNames.senseGlossary}"."phrase" LIKE ?
|
||||
GROUP BY "JMdict_EntryScore"."entryId"
|
||||
ORDER BY
|
||||
"score" DESC,
|
||||
"${JMdictTableNames.sense}"."entryId" ASC
|
||||
LIMIT ?
|
||||
OFFSET ?
|
||||
'''
|
||||
.trim(),
|
||||
[
|
||||
word,
|
||||
word,
|
||||
word,
|
||||
'%${word.replaceAll('%', '')}%',
|
||||
pageSize,
|
||||
offset,
|
||||
],
|
||||
);
|
||||
|
||||
return result
|
||||
.map((row) => ScoredEntryId(
|
||||
row['entryId'] as int,
|
||||
row['score'] as int,
|
||||
))
|
||||
.toList();
|
||||
}
|
||||
|
||||
Future<int> _queryEnglishCount(
|
||||
DatabaseExecutor connection,
|
||||
String word,
|
||||
) async {
|
||||
final result = await connection.rawQuery(
|
||||
'''
|
||||
|
||||
SELECT
|
||||
COUNT(DISTINCT "${JMdictTableNames.sense}"."entryId") AS "count"
|
||||
FROM "${JMdictTableNames.senseGlossary}"
|
||||
JOIN "${JMdictTableNames.sense}" USING ("senseId")
|
||||
WHERE "${JMdictTableNames.senseGlossary}"."phrase" LIKE ?
|
||||
'''
|
||||
.trim(),
|
||||
[
|
||||
'%$word%',
|
||||
],
|
||||
);
|
||||
|
||||
return result.first['count'] as int;
|
||||
}
|
||||
|
||||
Future<List<ScoredEntryId>> fetchEntryIds(
|
||||
DatabaseExecutor connection,
|
||||
String word,
|
||||
SearchMode searchMode,
|
||||
int pageSize,
|
||||
int? offset,
|
||||
) async {
|
||||
if (searchMode == SearchMode.Auto) {
|
||||
searchMode = _determineSearchMode(word);
|
||||
}
|
||||
|
||||
assert(
|
||||
word.isNotEmpty,
|
||||
'Word should not be empty when fetching entry IDs',
|
||||
);
|
||||
|
||||
late final List<ScoredEntryId> entryIds;
|
||||
switch (searchMode) {
|
||||
case SearchMode.Kanji:
|
||||
entryIds = await _queryKanji(
|
||||
connection,
|
||||
word,
|
||||
pageSize,
|
||||
offset,
|
||||
);
|
||||
break;
|
||||
|
||||
case SearchMode.Kana:
|
||||
entryIds = await _queryKana(
|
||||
connection,
|
||||
word,
|
||||
pageSize,
|
||||
offset,
|
||||
);
|
||||
break;
|
||||
|
||||
case SearchMode.English:
|
||||
entryIds = await _queryEnglish(
|
||||
connection,
|
||||
word,
|
||||
pageSize,
|
||||
offset,
|
||||
);
|
||||
break;
|
||||
|
||||
case SearchMode.MixedKana:
|
||||
case SearchMode.MixedKanji:
|
||||
default:
|
||||
throw UnimplementedError(
|
||||
'Search mode $searchMode is not implemented',
|
||||
);
|
||||
}
|
||||
;
|
||||
|
||||
return entryIds;
|
||||
}
|
||||
|
||||
Future<int?> fetchEntryIdCount(
|
||||
DatabaseExecutor connection,
|
||||
String word,
|
||||
SearchMode searchMode,
|
||||
) async {
|
||||
if (searchMode == SearchMode.Auto) {
|
||||
searchMode = _determineSearchMode(word);
|
||||
}
|
||||
|
||||
assert(
|
||||
word.isNotEmpty,
|
||||
'Word should not be empty when fetching entry IDs',
|
||||
);
|
||||
|
||||
late final int? entryIdCount;
|
||||
|
||||
switch (searchMode) {
|
||||
case SearchMode.Kanji:
|
||||
entryIdCount = await _queryKanjiCount(
|
||||
connection,
|
||||
word,
|
||||
);
|
||||
break;
|
||||
|
||||
case SearchMode.Kana:
|
||||
entryIdCount = await _queryKanaCount(
|
||||
connection,
|
||||
word,
|
||||
);
|
||||
break;
|
||||
|
||||
case SearchMode.English:
|
||||
entryIdCount = await _queryEnglishCount(
|
||||
connection,
|
||||
word,
|
||||
);
|
||||
break;
|
||||
|
||||
case SearchMode.MixedKana:
|
||||
case SearchMode.MixedKanji:
|
||||
default:
|
||||
throw UnimplementedError(
|
||||
'Search mode $searchMode is not implemented',
|
||||
);
|
||||
}
|
||||
|
||||
return entryIdCount;
|
||||
}
|
||||
319
lib/search/word_search/regrouping.dart
Normal file
319
lib/search/word_search/regrouping.dart
Normal file
@@ -0,0 +1,319 @@
|
||||
import 'package:collection/collection.dart';
|
||||
import 'package:jadb/models/common/jlpt_level.dart';
|
||||
import 'package:jadb/models/jmdict/jmdict_dialect.dart';
|
||||
import 'package:jadb/models/jmdict/jmdict_field.dart';
|
||||
import 'package:jadb/models/jmdict/jmdict_kanji_info.dart';
|
||||
import 'package:jadb/models/jmdict/jmdict_misc.dart';
|
||||
import 'package:jadb/models/jmdict/jmdict_pos.dart';
|
||||
import 'package:jadb/models/jmdict/jmdict_reading_info.dart';
|
||||
import 'package:jadb/models/word_search/word_search_result.dart';
|
||||
import 'package:jadb/models/word_search/word_search_ruby.dart';
|
||||
import 'package:jadb/models/word_search/word_search_sense.dart';
|
||||
import 'package:jadb/models/word_search/word_search_sense_language_source.dart';
|
||||
import 'package:jadb/models/word_search/word_search_sources.dart';
|
||||
import 'package:jadb/models/word_search/word_search_xref_entry.dart';
|
||||
import 'package:jadb/search/word_search/entry_id_query.dart';
|
||||
|
||||
List<WordSearchResult> regroupWordSearchResults({
|
||||
required List<ScoredEntryId> entryIds,
|
||||
required List<Map<String, Object?>> readingElements,
|
||||
required List<Map<String, Object?>> kanjiElements,
|
||||
required List<Map<String, Object?>> jlptTags,
|
||||
required List<Map<String, Object?>> commonEntries,
|
||||
required List<Map<String, Object?>> senses,
|
||||
required List<Map<String, Object?>> senseAntonyms,
|
||||
required List<Map<String, Object?>> senseDialects,
|
||||
required List<Map<String, Object?>> senseFields,
|
||||
required List<Map<String, Object?>> senseGlossaries,
|
||||
required List<Map<String, Object?>> senseInfos,
|
||||
required List<Map<String, Object?>> senseLanguageSources,
|
||||
required List<Map<String, Object?>> senseMiscs,
|
||||
required List<Map<String, Object?>> sensePOSs,
|
||||
required List<Map<String, Object?>> senseRestrictedToKanjis,
|
||||
required List<Map<String, Object?>> senseRestrictedToReadings,
|
||||
required List<Map<String, Object?>> senseSeeAlsos,
|
||||
required List<Map<String, Object?>> exampleSentences,
|
||||
required List<Map<String, Object?>> readingElementInfos,
|
||||
required List<Map<String, Object?>> readingElementRestrictions,
|
||||
required List<Map<String, Object?>> kanjiElementInfos,
|
||||
}) {
|
||||
final List<WordSearchResult> results = [];
|
||||
|
||||
final commonEntryIds =
|
||||
commonEntries.map((entry) => entry['entryId'] as int).toSet();
|
||||
|
||||
for (final scoredEntryId in entryIds) {
|
||||
final List<Map<String, Object?>> entryReadingElements = readingElements
|
||||
.where((element) => element['entryId'] == scoredEntryId.entryId)
|
||||
.toList();
|
||||
|
||||
final List<Map<String, Object?>> entryKanjiElements = kanjiElements
|
||||
.where((element) => element['entryId'] == scoredEntryId.entryId)
|
||||
.toList();
|
||||
|
||||
final List<Map<String, Object?>> entryJlptTags = jlptTags
|
||||
.where((element) => element['entryId'] == scoredEntryId.entryId)
|
||||
.toList();
|
||||
|
||||
final jlptLevel = entryJlptTags
|
||||
.map((e) => JlptLevel.fromString(e['jlptLevel'] as String?))
|
||||
.sorted((a, b) => b.compareTo(a))
|
||||
.firstOrNull ??
|
||||
JlptLevel.none;
|
||||
|
||||
final isCommon = commonEntryIds.contains(scoredEntryId.entryId);
|
||||
|
||||
final List<Map<String, Object?>> entrySenses = senses
|
||||
.where((element) => element['entryId'] == scoredEntryId.entryId)
|
||||
.toList();
|
||||
|
||||
final GroupedWordResult entryReadingElementsGrouped = _regroup_words(
|
||||
entryId: scoredEntryId.entryId,
|
||||
readingElements: entryReadingElements,
|
||||
kanjiElements: entryKanjiElements,
|
||||
readingElementInfos: readingElementInfos,
|
||||
readingElementRestrictions: readingElementRestrictions,
|
||||
kanjiElementInfos: kanjiElementInfos,
|
||||
);
|
||||
|
||||
final List<WordSearchSense> entrySensesGrouped = _regroup_senses(
|
||||
senses: entrySenses,
|
||||
senseAntonyms: senseAntonyms,
|
||||
senseDialects: senseDialects,
|
||||
senseFields: senseFields,
|
||||
senseGlossaries: senseGlossaries,
|
||||
senseInfos: senseInfos,
|
||||
senseLanguageSources: senseLanguageSources,
|
||||
senseMiscs: senseMiscs,
|
||||
sensePOSs: sensePOSs,
|
||||
senseRestrictedToKanjis: senseRestrictedToKanjis,
|
||||
senseRestrictedToReadings: senseRestrictedToReadings,
|
||||
senseSeeAlsos: senseSeeAlsos,
|
||||
exampleSentences: exampleSentences,
|
||||
);
|
||||
|
||||
results.add(
|
||||
WordSearchResult(
|
||||
score: scoredEntryId.score,
|
||||
entryId: scoredEntryId.entryId,
|
||||
isCommon: isCommon,
|
||||
japanese: entryReadingElementsGrouped.rubys,
|
||||
kanjiInfo: entryReadingElementsGrouped.kanjiInfos,
|
||||
readingInfo: entryReadingElementsGrouped.readingInfos,
|
||||
senses: entrySensesGrouped,
|
||||
jlptLevel: jlptLevel,
|
||||
sources: const WordSearchSources(
|
||||
jmdict: true,
|
||||
jmnedict: false,
|
||||
),
|
||||
),
|
||||
);
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
class GroupedWordResult {
|
||||
final List<WordSearchRuby> rubys;
|
||||
final Map<String, JMdictReadingInfo> readingInfos;
|
||||
final Map<String, JMdictKanjiInfo> kanjiInfos;
|
||||
|
||||
const GroupedWordResult({
|
||||
required this.rubys,
|
||||
required this.readingInfos,
|
||||
required this.kanjiInfos,
|
||||
});
|
||||
}
|
||||
|
||||
GroupedWordResult _regroup_words({
|
||||
required int entryId,
|
||||
required List<Map<String, Object?>> kanjiElements,
|
||||
required List<Map<String, Object?>> kanjiElementInfos,
|
||||
required List<Map<String, Object?>> readingElements,
|
||||
required List<Map<String, Object?>> readingElementInfos,
|
||||
required List<Map<String, Object?>> readingElementRestrictions,
|
||||
}) {
|
||||
final List<WordSearchRuby> rubys = [];
|
||||
|
||||
final kanjiElements_ =
|
||||
kanjiElements.where((element) => element['entryId'] == entryId).toList();
|
||||
|
||||
final readingElements_ = readingElements
|
||||
.where((element) => element['entryId'] == entryId)
|
||||
.toList();
|
||||
|
||||
final readingElementRestrictions_ = readingElementRestrictions
|
||||
.where((element) => element['entryId'] == entryId)
|
||||
.toList();
|
||||
|
||||
for (final readingElement in readingElements_) {
|
||||
if (readingElement['doesNotMatchKanji'] == 1 || kanjiElements_.isEmpty) {
|
||||
final ruby = WordSearchRuby(
|
||||
base: readingElement['reading'] as String,
|
||||
);
|
||||
rubys.add(ruby);
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
for (final kanjiElement in kanjiElements_) {
|
||||
final kanji = kanjiElement['reading'] as String;
|
||||
final reading = readingElement['reading'] as String;
|
||||
|
||||
final restrictions = readingElementRestrictions_
|
||||
.where((element) => element['reading'] == reading)
|
||||
.toList();
|
||||
|
||||
if (restrictions.isNotEmpty &&
|
||||
!restrictions.any((element) => element['restriction'] == kanji)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
final ruby = WordSearchRuby(
|
||||
base: kanji,
|
||||
furigana: reading,
|
||||
);
|
||||
rubys.add(ruby);
|
||||
}
|
||||
}
|
||||
|
||||
assert(
|
||||
rubys.isNotEmpty,
|
||||
'No readings found for entryId: $entryId',
|
||||
);
|
||||
|
||||
final Map<int, String> readingElementIdsToReading = {
|
||||
for (final element in readingElements_)
|
||||
element['elementId'] as int: element['reading'] as String,
|
||||
};
|
||||
|
||||
final Map<int, String> kanjiElementIdsToReading = {
|
||||
for (final element in kanjiElements_)
|
||||
element['elementId'] as int: element['reading'] as String,
|
||||
};
|
||||
|
||||
final readingElementInfos_ = readingElementInfos
|
||||
.where((element) => element['entryId'] == entryId)
|
||||
.toList();
|
||||
|
||||
final kanjiElementInfos_ = kanjiElementInfos
|
||||
.where((element) => element['entryId'] == entryId)
|
||||
.toList();
|
||||
|
||||
return GroupedWordResult(
|
||||
rubys: rubys,
|
||||
readingInfos: {
|
||||
for (final rei in readingElementInfos_)
|
||||
readingElementIdsToReading[rei['elementId'] as int]!:
|
||||
JMdictReadingInfo.fromId(rei['info'] as String),
|
||||
},
|
||||
kanjiInfos: {
|
||||
for (final kei in kanjiElementInfos_)
|
||||
kanjiElementIdsToReading[kei['elementId'] as int]!:
|
||||
JMdictKanjiInfo.fromId(kei['info'] as String),
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
List<WordSearchSense> _regroup_senses({
|
||||
required List<Map<String, Object?>> senses,
|
||||
required List<Map<String, Object?>> senseAntonyms,
|
||||
required List<Map<String, Object?>> senseDialects,
|
||||
required List<Map<String, Object?>> senseFields,
|
||||
required List<Map<String, Object?>> senseGlossaries,
|
||||
required List<Map<String, Object?>> senseInfos,
|
||||
required List<Map<String, Object?>> senseLanguageSources,
|
||||
required List<Map<String, Object?>> senseMiscs,
|
||||
required List<Map<String, Object?>> sensePOSs,
|
||||
required List<Map<String, Object?>> senseRestrictedToKanjis,
|
||||
required List<Map<String, Object?>> senseRestrictedToReadings,
|
||||
required List<Map<String, Object?>> senseSeeAlsos,
|
||||
required List<Map<String, Object?>> exampleSentences,
|
||||
}) {
|
||||
final groupedSenseAntonyms =
|
||||
senseAntonyms.groupListsBy((element) => element['senseId'] as int);
|
||||
final groupedSenseDialects =
|
||||
senseDialects.groupListsBy((element) => element['senseId'] as int);
|
||||
final groupedSenseFields =
|
||||
senseFields.groupListsBy((element) => element['senseId'] as int);
|
||||
final groupedSenseGlossaries =
|
||||
senseGlossaries.groupListsBy((element) => element['senseId'] as int);
|
||||
final groupedSenseInfos =
|
||||
senseInfos.groupListsBy((element) => element['senseId'] as int);
|
||||
final groupedSenseLanguageSources =
|
||||
senseLanguageSources.groupListsBy((element) => element['senseId'] as int);
|
||||
final groupedSenseMiscs =
|
||||
senseMiscs.groupListsBy((element) => element['senseId'] as int);
|
||||
final groupedSensePOSs =
|
||||
sensePOSs.groupListsBy((element) => element['senseId'] as int);
|
||||
final groupedSenseRestrictedToKanjis = senseRestrictedToKanjis
|
||||
.groupListsBy((element) => element['senseId'] as int);
|
||||
final groupedSenseRestrictedToReadings = senseRestrictedToReadings
|
||||
.groupListsBy((element) => element['senseId'] as int);
|
||||
final groupedSenseSeeAlsos =
|
||||
senseSeeAlsos.groupListsBy((element) => element['senseId'] as int);
|
||||
|
||||
final List<WordSearchSense> result = [];
|
||||
for (final sense in senses) {
|
||||
final int senseId = sense['senseId'] as int;
|
||||
|
||||
final antonyms = groupedSenseAntonyms[senseId] ?? [];
|
||||
final dialects = groupedSenseDialects[senseId] ?? [];
|
||||
final fields = groupedSenseFields[senseId] ?? [];
|
||||
final glossaries = groupedSenseGlossaries[senseId] ?? [];
|
||||
final infos = groupedSenseInfos[senseId] ?? [];
|
||||
final languageSources = groupedSenseLanguageSources[senseId] ?? [];
|
||||
final miscs = groupedSenseMiscs[senseId] ?? [];
|
||||
final pos = groupedSensePOSs[senseId] ?? [];
|
||||
final restrictedToKanjis = groupedSenseRestrictedToKanjis[senseId] ?? [];
|
||||
final restrictedToReadings =
|
||||
groupedSenseRestrictedToReadings[senseId] ?? [];
|
||||
final seeAlsos = groupedSenseSeeAlsos[senseId] ?? [];
|
||||
|
||||
final resultSense = WordSearchSense(
|
||||
englishDefinitions: glossaries.map((e) => e['phrase'] as String).toList(),
|
||||
partsOfSpeech:
|
||||
pos.map((e) => JMdictPOS.fromId(e['pos'] as String)).toList(),
|
||||
seeAlso: seeAlsos
|
||||
.map((e) => WordSearchXrefEntry(
|
||||
entryId: e['xrefEntryId'] as int,
|
||||
baseWord: e['base'] as String,
|
||||
furigana: e['furigana'] as String?,
|
||||
ambiguous: e['ambiguous'] == 1,
|
||||
))
|
||||
.toList(),
|
||||
antonyms: antonyms
|
||||
.map((e) => WordSearchXrefEntry(
|
||||
entryId: e['xrefEntryId'] as int,
|
||||
baseWord: e['base'] as String,
|
||||
furigana: e['furigana'] as String?,
|
||||
ambiguous: e['ambiguous'] == 1,
|
||||
))
|
||||
.toList(),
|
||||
restrictedToReading:
|
||||
restrictedToReadings.map((e) => e['reading'] as String).toList(),
|
||||
restrictedToKanji:
|
||||
restrictedToKanjis.map((e) => e['kanji'] as String).toList(),
|
||||
fields:
|
||||
fields.map((e) => JMdictField.fromId(e['field'] as String)).toList(),
|
||||
dialects: dialects
|
||||
.map((e) => JMdictDialect.fromId(e['dialect'] as String))
|
||||
.toList(),
|
||||
misc: miscs.map((e) => JMdictMisc.fromId(e['misc'] as String)).toList(),
|
||||
info: infos.map((e) => e['info'] as String).toList(),
|
||||
languageSource: languageSources
|
||||
.map((e) => WordSearchSenseLanguageSource(
|
||||
language: e['language'] as String,
|
||||
phrase: e['phrase'] as String?,
|
||||
fullyDescribesSense: e['fullyDescribesSense'] == 1,
|
||||
constructedFromSmallerWords:
|
||||
e['constructedFromSmallerWords'] == 1,
|
||||
))
|
||||
.toList(),
|
||||
);
|
||||
|
||||
result.add(resultSense);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
154
lib/search/word_search/word_search.dart
Normal file
154
lib/search/word_search/word_search.dart
Normal file
@@ -0,0 +1,154 @@
|
||||
// TODO: Support globs
|
||||
|
||||
// TODO: Support tags
|
||||
|
||||
// TODO: Prefer original kana type when sorting results
|
||||
|
||||
// TODO: Support mixing kana and romaji
|
||||
//
|
||||
import 'package:jadb/models/word_search/word_search_result.dart';
|
||||
import 'package:jadb/search/word_search/data_query.dart';
|
||||
import 'package:jadb/search/word_search/entry_id_query.dart';
|
||||
import 'package:jadb/search/word_search/regrouping.dart';
|
||||
import 'package:jadb/table_names/jmdict.dart';
|
||||
import 'package:sqflite_common/sqlite_api.dart';
|
||||
|
||||
enum SearchMode {
|
||||
Auto,
|
||||
English,
|
||||
Kanji,
|
||||
MixedKanji,
|
||||
Kana,
|
||||
MixedKana,
|
||||
}
|
||||
|
||||
Future<List<WordSearchResult>?> searchWordWithDbConnection(
|
||||
DatabaseExecutor connection,
|
||||
String word,
|
||||
SearchMode searchMode,
|
||||
int page,
|
||||
int pageSize,
|
||||
) async {
|
||||
if (word.isEmpty) {
|
||||
return null;
|
||||
}
|
||||
|
||||
final offset = page * pageSize;
|
||||
final List<ScoredEntryId> entryIds = await fetchEntryIds(
|
||||
connection,
|
||||
word,
|
||||
searchMode,
|
||||
pageSize,
|
||||
offset,
|
||||
);
|
||||
|
||||
if (entryIds.isEmpty) {
|
||||
// TODO: try conjugation search
|
||||
return [];
|
||||
}
|
||||
|
||||
final LinearWordQueryData linearWordQueryData =
|
||||
await fetchLinearWordQueryData(
|
||||
connection,
|
||||
entryIds.map((e) => e.entryId).toList(),
|
||||
);
|
||||
|
||||
final result = regroupWordSearchResults(
|
||||
entryIds: entryIds,
|
||||
readingElements: linearWordQueryData.readingElements,
|
||||
kanjiElements: linearWordQueryData.kanjiElements,
|
||||
jlptTags: linearWordQueryData.jlptTags,
|
||||
commonEntries: linearWordQueryData.commonEntries,
|
||||
senses: linearWordQueryData.senses,
|
||||
senseAntonyms: linearWordQueryData.senseAntonyms,
|
||||
senseDialects: linearWordQueryData.senseDialects,
|
||||
senseFields: linearWordQueryData.senseFields,
|
||||
senseGlossaries: linearWordQueryData.senseGlossaries,
|
||||
senseInfos: linearWordQueryData.senseInfos,
|
||||
senseLanguageSources: linearWordQueryData.senseLanguageSources,
|
||||
senseMiscs: linearWordQueryData.senseMiscs,
|
||||
sensePOSs: linearWordQueryData.sensePOSs,
|
||||
senseRestrictedToKanjis: linearWordQueryData.senseRestrictedToKanjis,
|
||||
senseRestrictedToReadings: linearWordQueryData.senseRestrictedToReadings,
|
||||
senseSeeAlsos: linearWordQueryData.senseSeeAlsos,
|
||||
exampleSentences: linearWordQueryData.exampleSentences,
|
||||
readingElementInfos: linearWordQueryData.readingElementInfos,
|
||||
readingElementRestrictions: linearWordQueryData.readingElementRestrictions,
|
||||
kanjiElementInfos: linearWordQueryData.kanjiElementInfos,
|
||||
);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
Future<int?> searchWordCountWithDbConnection(
|
||||
DatabaseExecutor connection,
|
||||
String word,
|
||||
SearchMode searchMode,
|
||||
) async {
|
||||
if (word.isEmpty) {
|
||||
return null;
|
||||
}
|
||||
|
||||
final int? entryIdCount = await fetchEntryIdCount(
|
||||
connection,
|
||||
word,
|
||||
searchMode,
|
||||
);
|
||||
|
||||
return entryIdCount;
|
||||
}
|
||||
|
||||
Future<WordSearchResult?> getWordByIdWithDbConnection(
|
||||
DatabaseExecutor connection,
|
||||
int id,
|
||||
) async {
|
||||
if (id <= 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
final exists = await connection.rawQuery(
|
||||
'SELECT EXISTS(SELECT 1 FROM "${JMdictTableNames.entry}" WHERE "entryId" = ?)',
|
||||
[id],
|
||||
).then((value) => value.isNotEmpty && value.first.values.first == 1);
|
||||
|
||||
if (!exists) {
|
||||
return null;
|
||||
}
|
||||
|
||||
final LinearWordQueryData linearWordQueryData =
|
||||
await fetchLinearWordQueryData(
|
||||
connection,
|
||||
[id],
|
||||
);
|
||||
|
||||
final result = regroupWordSearchResults(
|
||||
entryIds: [ScoredEntryId(id, 0)],
|
||||
readingElements: linearWordQueryData.readingElements,
|
||||
kanjiElements: linearWordQueryData.kanjiElements,
|
||||
jlptTags: linearWordQueryData.jlptTags,
|
||||
commonEntries: linearWordQueryData.commonEntries,
|
||||
senses: linearWordQueryData.senses,
|
||||
senseAntonyms: linearWordQueryData.senseAntonyms,
|
||||
senseDialects: linearWordQueryData.senseDialects,
|
||||
senseFields: linearWordQueryData.senseFields,
|
||||
senseGlossaries: linearWordQueryData.senseGlossaries,
|
||||
senseInfos: linearWordQueryData.senseInfos,
|
||||
senseLanguageSources: linearWordQueryData.senseLanguageSources,
|
||||
senseMiscs: linearWordQueryData.senseMiscs,
|
||||
sensePOSs: linearWordQueryData.sensePOSs,
|
||||
senseRestrictedToKanjis: linearWordQueryData.senseRestrictedToKanjis,
|
||||
senseRestrictedToReadings: linearWordQueryData.senseRestrictedToReadings,
|
||||
senseSeeAlsos: linearWordQueryData.senseSeeAlsos,
|
||||
exampleSentences: linearWordQueryData.exampleSentences,
|
||||
readingElementInfos: linearWordQueryData.readingElementInfos,
|
||||
readingElementRestrictions: linearWordQueryData.readingElementRestrictions,
|
||||
kanjiElementInfos: linearWordQueryData.kanjiElementInfos,
|
||||
);
|
||||
|
||||
assert(
|
||||
result.length == 1,
|
||||
'Expected exactly one result for entryId $id, but got ${result.length}',
|
||||
);
|
||||
|
||||
return result.firstOrNull;
|
||||
}
|
||||
@@ -1,7 +1,5 @@
|
||||
abstract class JMdictTableNames {
|
||||
static const String entry = 'JMdict_Entry';
|
||||
static const String entryByKana = 'JMdict_EntryByKana';
|
||||
static const String entryByEnglish = 'JMdict_EntryByEnglish';
|
||||
static const String kanjiElement = 'JMdict_KanjiElement';
|
||||
static const String kanjiInfo = 'JMdict_KanjiElementInfo';
|
||||
static const String readingElement = 'JMdict_ReadingElement';
|
||||
@@ -23,8 +21,6 @@ abstract class JMdictTableNames {
|
||||
|
||||
static Set<String> get allTables => {
|
||||
entry,
|
||||
entryByKana,
|
||||
entryByEnglish,
|
||||
kanjiElement,
|
||||
kanjiInfo,
|
||||
readingElement,
|
||||
@@ -289,15 +289,16 @@ extension on DateTime {
|
||||
}
|
||||
|
||||
String get japaneseWeekdayPrefix => [
|
||||
'月',
|
||||
'火',
|
||||
'水',
|
||||
'木',
|
||||
'金',
|
||||
'土',
|
||||
'日',
|
||||
][weekday - 1];
|
||||
'月',
|
||||
'火',
|
||||
'水',
|
||||
'木',
|
||||
'金',
|
||||
'土',
|
||||
'日',
|
||||
][weekday - 1];
|
||||
|
||||
/// Returns the date in Japanese format.
|
||||
String japaneseDate({bool showWeekday = false}) => '$month月$day日' + (showWeekday ? '($japaneseWeekdayPrefix)' : '');
|
||||
String japaneseDate({bool showWeekday = false}) =>
|
||||
'$month月$day日' + (showWeekday ? '($japaneseWeekdayPrefix)' : '');
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
247
lib/util/lemmatizer/lemmatizer.dart
Normal file
247
lib/util/lemmatizer/lemmatizer.dart
Normal file
@@ -0,0 +1,247 @@
|
||||
import 'package:jadb/util/lemmatizer/rules.dart';
|
||||
|
||||
enum WordClass {
|
||||
noun,
|
||||
ichidanVerb,
|
||||
godanVerb,
|
||||
irregularVerb,
|
||||
iAdjective,
|
||||
nAdjective,
|
||||
adverb,
|
||||
particle,
|
||||
input,
|
||||
}
|
||||
|
||||
enum LemmatizationRuleType {
|
||||
prefix,
|
||||
suffix,
|
||||
}
|
||||
|
||||
class LemmatizationRule {
|
||||
final String name;
|
||||
final AllomorphPattern pattern;
|
||||
final WordClass wordClass;
|
||||
final List<WordClass>? validChildClasses;
|
||||
final bool terminal;
|
||||
|
||||
const LemmatizationRule({
|
||||
required this.name,
|
||||
required this.pattern,
|
||||
required this.wordClass,
|
||||
this.validChildClasses,
|
||||
this.terminal = false,
|
||||
});
|
||||
|
||||
bool matches(String word) => pattern.matches(word);
|
||||
|
||||
List<String>? apply(String word) => pattern.apply(word);
|
||||
|
||||
LemmatizationRule.simple({
|
||||
required String name,
|
||||
required String pattern,
|
||||
required String? replacement,
|
||||
required WordClass wordClass,
|
||||
validChildClasses,
|
||||
terminal = false,
|
||||
lookAheadBehind = const [''],
|
||||
LemmatizationRuleType type = LemmatizationRuleType.suffix,
|
||||
}) : this(
|
||||
name: name,
|
||||
pattern: AllomorphPattern(
|
||||
patterns: {
|
||||
pattern: replacement != null ? [replacement] : null
|
||||
},
|
||||
type: type,
|
||||
lookAheadBehind: lookAheadBehind,
|
||||
),
|
||||
validChildClasses: validChildClasses,
|
||||
terminal: terminal,
|
||||
wordClass: wordClass,
|
||||
);
|
||||
}
|
||||
|
||||
/// Represents a set of patterns for matching allomorphs in a word.
|
||||
/// The patterns can be either a prefix or a suffix, and they can include
|
||||
/// replacement characters for deconjugating into base forms.
|
||||
class AllomorphPattern {
|
||||
final List<Pattern> lookAheadBehind;
|
||||
final Map<String, List<String>?> patterns;
|
||||
final LemmatizationRuleType type;
|
||||
|
||||
const AllomorphPattern({
|
||||
required this.patterns,
|
||||
required this.type,
|
||||
this.lookAheadBehind = const [''],
|
||||
});
|
||||
|
||||
List<(String, Pattern)> get allPatternCombinations {
|
||||
final combinations = <(String, Pattern)>[];
|
||||
for (final l in lookAheadBehind) {
|
||||
for (final p in patterns.keys) {
|
||||
switch ((type, l is RegExp)) {
|
||||
case (LemmatizationRuleType.prefix, true):
|
||||
combinations.add((p, RegExp('^($p)(${(l as RegExp).pattern})')));
|
||||
break;
|
||||
case (LemmatizationRuleType.prefix, false):
|
||||
combinations.add((p, '$p$l'));
|
||||
break;
|
||||
case (LemmatizationRuleType.suffix, true):
|
||||
combinations.add((p, RegExp('(${(l as RegExp).pattern})($p)\$')));
|
||||
break;
|
||||
case (LemmatizationRuleType.suffix, false):
|
||||
combinations.add((p, '$l$p'));
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
return combinations;
|
||||
}
|
||||
|
||||
bool matches(String word) {
|
||||
for (final (_, p) in allPatternCombinations) {
|
||||
if (p is String) {
|
||||
if (type == LemmatizationRuleType.prefix
|
||||
? word.startsWith(p)
|
||||
: word.endsWith(p)) {
|
||||
return true;
|
||||
}
|
||||
} else if (p is RegExp) {
|
||||
if (p.hasMatch(word)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
List<String>? apply(String word) {
|
||||
for (final (affix, p) in allPatternCombinations) {
|
||||
switch ((type, p is RegExp)) {
|
||||
case (LemmatizationRuleType.prefix, true):
|
||||
final match = (p as RegExp).firstMatch(word);
|
||||
if (match != null) {
|
||||
final prefix = match.group(1)!;
|
||||
assert(prefix == affix);
|
||||
final suffix = word.substring(prefix.length);
|
||||
return patterns[prefix] != null
|
||||
? patterns[prefix]!.map((s) => s + suffix).toList()
|
||||
: [suffix];
|
||||
}
|
||||
break;
|
||||
case (LemmatizationRuleType.prefix, false):
|
||||
if (word.startsWith(p as String)) {
|
||||
return patterns[affix] != null
|
||||
? patterns[affix]!
|
||||
.map((s) => s + word.substring(affix.length))
|
||||
.toList()
|
||||
: [word.substring(affix.length)];
|
||||
}
|
||||
break;
|
||||
case (LemmatizationRuleType.suffix, true):
|
||||
final match = (p as RegExp).firstMatch(word);
|
||||
if (match != null) {
|
||||
final suffix = match.group(2)!;
|
||||
assert(suffix == affix);
|
||||
final prefix = word.substring(0, word.length - suffix.length);
|
||||
return patterns[suffix] != null
|
||||
? patterns[suffix]!.map((s) => prefix + s).toList()
|
||||
: [prefix];
|
||||
}
|
||||
break;
|
||||
case (LemmatizationRuleType.suffix, false):
|
||||
if (word.endsWith(p as String)) {
|
||||
final prefix = word.substring(0, word.length - affix.length);
|
||||
return patterns[affix] != null
|
||||
? patterns[affix]!.map((s) => prefix + s).toList()
|
||||
: [prefix];
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
class Lemmatized {
|
||||
final String original;
|
||||
final LemmatizationRule rule;
|
||||
final int variant;
|
||||
final List<Lemmatized> children;
|
||||
|
||||
const Lemmatized({
|
||||
required this.original,
|
||||
required this.rule,
|
||||
this.variant = 0,
|
||||
this.children = const [],
|
||||
});
|
||||
|
||||
String? get applied {
|
||||
final applied = rule.apply(original);
|
||||
if (applied == null || applied.isEmpty) {
|
||||
return null;
|
||||
}
|
||||
return applied[variant];
|
||||
}
|
||||
|
||||
@override
|
||||
String toString() {
|
||||
final childrenString = children
|
||||
.map((c) => ' - ' + c.toString().split('\n').join('\n '))
|
||||
.join('\n');
|
||||
|
||||
if (children.isEmpty) {
|
||||
return '$original (${rule.name}) -> ${applied ?? '<null>'}';
|
||||
} else {
|
||||
return '$original (${rule.name}) -> ${applied ?? '<null>'}\n$childrenString';
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
List<Lemmatized> _lemmatize(LemmatizationRule parentRule, String word) {
|
||||
final children = <Lemmatized>[];
|
||||
|
||||
if (parentRule.terminal) {
|
||||
return children;
|
||||
}
|
||||
|
||||
final filteredLemmatizationRules = parentRule.validChildClasses == null
|
||||
? lemmatizationRules
|
||||
: lemmatizationRules.where(
|
||||
(r) => parentRule.validChildClasses!.contains(r.wordClass),
|
||||
);
|
||||
|
||||
for (final rule in filteredLemmatizationRules) {
|
||||
if (rule.matches(word)) {
|
||||
final applied = rule.apply(word);
|
||||
for (final (i, a) in (applied ?? []).indexed) {
|
||||
final subChildren = _lemmatize(rule, a);
|
||||
children.add(
|
||||
Lemmatized(
|
||||
original: word,
|
||||
rule: rule,
|
||||
variant: i,
|
||||
children: subChildren,
|
||||
),
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
return children;
|
||||
}
|
||||
|
||||
Lemmatized lemmatize(String word) {
|
||||
final inputRule = LemmatizationRule.simple(
|
||||
name: 'Input',
|
||||
pattern: '',
|
||||
replacement: null,
|
||||
wordClass: WordClass.input,
|
||||
);
|
||||
return Lemmatized(
|
||||
original: word,
|
||||
rule: inputRule,
|
||||
children: _lemmatize(
|
||||
inputRule,
|
||||
word,
|
||||
),
|
||||
);
|
||||
}
|
||||
10
lib/util/lemmatizer/rules.dart
Normal file
10
lib/util/lemmatizer/rules.dart
Normal file
@@ -0,0 +1,10 @@
|
||||
import 'package:jadb/util/lemmatizer/lemmatizer.dart';
|
||||
import 'package:jadb/util/lemmatizer/rules/godan-verbs.dart';
|
||||
import 'package:jadb/util/lemmatizer/rules/i-adjectives.dart';
|
||||
import 'package:jadb/util/lemmatizer/rules/ichidan-verbs.dart';
|
||||
|
||||
List<LemmatizationRule> lemmatizationRules = [
|
||||
...ichidanVerbLemmatizationRules,
|
||||
...godanVerbLemmatizationRules,
|
||||
...iAdjectiveLemmatizationRules,
|
||||
];
|
||||
457
lib/util/lemmatizer/rules/godan-verbs.dart
Normal file
457
lib/util/lemmatizer/rules/godan-verbs.dart
Normal file
@@ -0,0 +1,457 @@
|
||||
import 'package:jadb/util/lemmatizer/lemmatizer.dart';
|
||||
|
||||
List<LemmatizationRule> godanVerbLemmatizationRules = [
|
||||
LemmatizationRule(
|
||||
name: 'Godan verb - base form',
|
||||
terminal: true,
|
||||
pattern: AllomorphPattern(
|
||||
patterns: {
|
||||
'う': ['う'],
|
||||
'く': ['く'],
|
||||
'ぐ': ['ぐ'],
|
||||
'す': ['す'],
|
||||
'つ': ['つ'],
|
||||
'ぬ': ['ぬ'],
|
||||
'ぶ': ['ぶ'],
|
||||
'む': ['む'],
|
||||
'る': ['る'],
|
||||
},
|
||||
type: LemmatizationRuleType.suffix,
|
||||
),
|
||||
validChildClasses: [WordClass.godanVerb],
|
||||
wordClass: WordClass.godanVerb,
|
||||
),
|
||||
LemmatizationRule(
|
||||
name: 'Godan verb - negative form',
|
||||
pattern: AllomorphPattern(
|
||||
patterns: {
|
||||
'わない': ['う'],
|
||||
'かない': ['く'],
|
||||
'がない': ['ぐ'],
|
||||
'さない': ['す'],
|
||||
'たない': ['つ'],
|
||||
'なない': ['ぬ'],
|
||||
'ばない': ['ぶ'],
|
||||
'まない': ['む'],
|
||||
'らない': ['る'],
|
||||
},
|
||||
type: LemmatizationRuleType.suffix,
|
||||
),
|
||||
validChildClasses: [WordClass.godanVerb],
|
||||
wordClass: WordClass.godanVerb,
|
||||
),
|
||||
LemmatizationRule(
|
||||
name: 'Godan verb - past form',
|
||||
pattern: AllomorphPattern(
|
||||
patterns: {
|
||||
'した': ['す'],
|
||||
'った': ['る', 'つ', 'う'],
|
||||
'んだ': ['む', 'ぬ', 'ぶ'],
|
||||
'いだ': ['ぐ'],
|
||||
'いた': ['く'],
|
||||
},
|
||||
type: LemmatizationRuleType.suffix,
|
||||
),
|
||||
validChildClasses: [WordClass.godanVerb],
|
||||
wordClass: WordClass.godanVerb,
|
||||
),
|
||||
LemmatizationRule(
|
||||
name: 'Godan verb - te-form',
|
||||
pattern: AllomorphPattern(
|
||||
patterns: {
|
||||
'いて': ['く', 'ぐ'],
|
||||
'して': ['す'],
|
||||
'って': ['る', 'つ', 'う'],
|
||||
'んで': ['む', 'ぬ', 'ぶ'],
|
||||
},
|
||||
type: LemmatizationRuleType.suffix,
|
||||
),
|
||||
validChildClasses: [WordClass.godanVerb],
|
||||
wordClass: WordClass.godanVerb,
|
||||
),
|
||||
LemmatizationRule(
|
||||
name: 'Godan verb - te-form with いる',
|
||||
pattern: AllomorphPattern(
|
||||
patterns: {
|
||||
'いている': ['く', 'ぐ'],
|
||||
'している': ['す'],
|
||||
'っている': ['る', 'つ', 'う'],
|
||||
'んでいる': ['む', 'ぬ', 'ぶ'],
|
||||
},
|
||||
type: LemmatizationRuleType.suffix,
|
||||
),
|
||||
validChildClasses: [WordClass.godanVerb],
|
||||
wordClass: WordClass.godanVerb,
|
||||
),
|
||||
LemmatizationRule(
|
||||
name: 'Godan verb - te-form with いた',
|
||||
pattern: AllomorphPattern(
|
||||
patterns: {
|
||||
'いていた': ['く', 'ぐ'],
|
||||
'していた': ['す'],
|
||||
'っていた': ['る', 'つ', 'う'],
|
||||
'んでいた': ['む', 'ぬ', 'ぶ'],
|
||||
},
|
||||
type: LemmatizationRuleType.suffix,
|
||||
),
|
||||
validChildClasses: [WordClass.godanVerb],
|
||||
wordClass: WordClass.godanVerb,
|
||||
),
|
||||
LemmatizationRule(
|
||||
name: 'Godan verb - conditional form',
|
||||
pattern: AllomorphPattern(
|
||||
patterns: {
|
||||
'けば': ['く'],
|
||||
'げば': ['ぐ'],
|
||||
'せば': ['す'],
|
||||
'てば': ['つ', 'る', 'う'],
|
||||
'ねば': ['ぬ'],
|
||||
'べば': ['ぶ'],
|
||||
'めば': ['む'],
|
||||
},
|
||||
type: LemmatizationRuleType.suffix,
|
||||
),
|
||||
validChildClasses: [WordClass.godanVerb],
|
||||
wordClass: WordClass.godanVerb,
|
||||
),
|
||||
LemmatizationRule(
|
||||
name: 'Godan verb - volitional form',
|
||||
pattern: AllomorphPattern(
|
||||
patterns: {
|
||||
'おう': ['う'],
|
||||
'こう': ['く'],
|
||||
'ごう': ['ぐ'],
|
||||
'そう': ['す'],
|
||||
'とう': ['つ', 'る', 'う'],
|
||||
'のう': ['ぬ'],
|
||||
'ぼう': ['ぶ'],
|
||||
'もう': ['む'],
|
||||
},
|
||||
type: LemmatizationRuleType.suffix,
|
||||
),
|
||||
validChildClasses: [WordClass.godanVerb],
|
||||
wordClass: WordClass.godanVerb,
|
||||
),
|
||||
LemmatizationRule(
|
||||
name: 'Godan verb - potential form',
|
||||
pattern: AllomorphPattern(
|
||||
patterns: {
|
||||
'ける': ['く'],
|
||||
'げる': ['ぐ'],
|
||||
'せる': ['す'],
|
||||
'てる': ['つ', 'る', 'う'],
|
||||
'ねる': ['ぬ'],
|
||||
'べる': ['ぶ'],
|
||||
'める': ['む'],
|
||||
},
|
||||
type: LemmatizationRuleType.suffix,
|
||||
),
|
||||
validChildClasses: [WordClass.godanVerb],
|
||||
wordClass: WordClass.godanVerb,
|
||||
),
|
||||
LemmatizationRule(
|
||||
name: 'Godan verb - passive form',
|
||||
pattern: AllomorphPattern(
|
||||
patterns: {
|
||||
'かれる': ['く'],
|
||||
'がれる': ['ぐ'],
|
||||
'される': ['す'],
|
||||
'たれる': ['つ', 'る', 'う'],
|
||||
'なれる': ['ぬ'],
|
||||
'ばれる': ['ぶ'],
|
||||
'まれる': ['む'],
|
||||
},
|
||||
type: LemmatizationRuleType.suffix,
|
||||
),
|
||||
validChildClasses: [WordClass.godanVerb],
|
||||
wordClass: WordClass.godanVerb,
|
||||
),
|
||||
LemmatizationRule(
|
||||
name: 'Godan verb - causative form',
|
||||
pattern: AllomorphPattern(
|
||||
patterns: {
|
||||
'かせる': ['く'],
|
||||
'がせる': ['ぐ'],
|
||||
'させる': ['す'],
|
||||
'たせる': ['つ', 'る', 'う'],
|
||||
'なせる': ['ぬ'],
|
||||
'ばせる': ['ぶ'],
|
||||
'ませる': ['む'],
|
||||
},
|
||||
type: LemmatizationRuleType.suffix,
|
||||
),
|
||||
validChildClasses: [WordClass.godanVerb],
|
||||
wordClass: WordClass.godanVerb,
|
||||
),
|
||||
LemmatizationRule(
|
||||
name: 'Godan verb - causative-passive form',
|
||||
pattern: AllomorphPattern(
|
||||
patterns: {
|
||||
'かされる': ['く'],
|
||||
'がされる': ['ぐ'],
|
||||
'される': ['す'],
|
||||
'たされる': ['つ', 'る', 'う'],
|
||||
'なされる': ['ぬ'],
|
||||
'ばされる': ['ぶ'],
|
||||
'まされる': ['む'],
|
||||
},
|
||||
type: LemmatizationRuleType.suffix,
|
||||
),
|
||||
validChildClasses: [WordClass.godanVerb],
|
||||
wordClass: WordClass.godanVerb,
|
||||
),
|
||||
LemmatizationRule(
|
||||
name: 'Godan verb - imperative form',
|
||||
pattern: AllomorphPattern(
|
||||
patterns: {
|
||||
'え': ['う'],
|
||||
'け': ['く'],
|
||||
'げ': ['ぐ'],
|
||||
'せ': ['す'],
|
||||
'て': ['つ', 'る', 'う'],
|
||||
'ね': ['ぬ'],
|
||||
'べ': ['ぶ'],
|
||||
'め': ['む'],
|
||||
},
|
||||
type: LemmatizationRuleType.suffix,
|
||||
),
|
||||
validChildClasses: [WordClass.godanVerb],
|
||||
wordClass: WordClass.godanVerb,
|
||||
),
|
||||
LemmatizationRule(
|
||||
name: 'Godan verb - negative past form',
|
||||
pattern: AllomorphPattern(
|
||||
patterns: {
|
||||
'わなかった': ['う'],
|
||||
'かなかった': ['く'],
|
||||
'がなかった': ['ぐ'],
|
||||
'さなかった': ['す'],
|
||||
'たなかった': ['つ'],
|
||||
'ななかった': ['ぬ'],
|
||||
'ばなかった': ['ぶ'],
|
||||
'まなかった': ['む'],
|
||||
'らなかった': ['る'],
|
||||
},
|
||||
type: LemmatizationRuleType.suffix,
|
||||
),
|
||||
validChildClasses: [WordClass.godanVerb],
|
||||
wordClass: WordClass.godanVerb,
|
||||
),
|
||||
LemmatizationRule(
|
||||
name: 'Godan verb - negative te-form',
|
||||
pattern: AllomorphPattern(
|
||||
patterns: {
|
||||
'わなくて': ['う'],
|
||||
'かなくて': ['く'],
|
||||
'がなくて': ['ぐ'],
|
||||
'さなくて': ['す'],
|
||||
'たなくて': ['つ'],
|
||||
'ななくて': ['ぬ'],
|
||||
'ばなくて': ['ぶ'],
|
||||
'まなくて': ['む'],
|
||||
'らなくて': ['る'],
|
||||
},
|
||||
type: LemmatizationRuleType.suffix,
|
||||
),
|
||||
validChildClasses: [WordClass.godanVerb],
|
||||
wordClass: WordClass.godanVerb,
|
||||
),
|
||||
LemmatizationRule(
|
||||
name: 'Godan verb - negative conditional form',
|
||||
pattern: AllomorphPattern(
|
||||
patterns: {
|
||||
'わなければ': ['う'],
|
||||
'かなければ': ['く'],
|
||||
'がなければ': ['ぐ'],
|
||||
'さなければ': ['す'],
|
||||
'たなければ': ['つ'],
|
||||
'ななければ': ['ぬ'],
|
||||
'ばなければ': ['ぶ'],
|
||||
'まなければ': ['む'],
|
||||
'らなければ': ['る'],
|
||||
},
|
||||
type: LemmatizationRuleType.suffix,
|
||||
),
|
||||
validChildClasses: [WordClass.godanVerb],
|
||||
wordClass: WordClass.godanVerb,
|
||||
),
|
||||
LemmatizationRule(
|
||||
name: 'Godan verb - negative volitional form',
|
||||
pattern: AllomorphPattern(
|
||||
patterns: {
|
||||
'うまい': ['う'],
|
||||
'くまい': ['く'],
|
||||
'ぐまい': ['ぐ'],
|
||||
'すまい': ['す'],
|
||||
'つまい': ['つ', 'る', 'う'],
|
||||
'ぬまい': ['ぬ'],
|
||||
'ぶまい': ['ぶ'],
|
||||
'むまい': ['む'],
|
||||
},
|
||||
type: LemmatizationRuleType.suffix,
|
||||
),
|
||||
validChildClasses: [WordClass.godanVerb],
|
||||
wordClass: WordClass.godanVerb,
|
||||
),
|
||||
LemmatizationRule(
|
||||
name: 'Godan verb - negative potential form',
|
||||
pattern: AllomorphPattern(
|
||||
patterns: {
|
||||
'けない': ['く'],
|
||||
'げない': ['ぐ'],
|
||||
'せない': ['す'],
|
||||
'てない': ['つ', 'る', 'う'],
|
||||
'ねない': ['ぬ'],
|
||||
'べない': ['ぶ'],
|
||||
'めない': ['む'],
|
||||
},
|
||||
type: LemmatizationRuleType.suffix,
|
||||
),
|
||||
validChildClasses: [WordClass.godanVerb],
|
||||
wordClass: WordClass.godanVerb,
|
||||
),
|
||||
LemmatizationRule(
|
||||
name: 'Godan verb - negative passive form',
|
||||
pattern: AllomorphPattern(
|
||||
patterns: {
|
||||
'かれない': ['く'],
|
||||
'がれない': ['ぐ'],
|
||||
'されない': ['す'],
|
||||
'たれない': ['つ', 'る', 'う'],
|
||||
'なれない': ['ぬ'],
|
||||
'ばれない': ['ぶ'],
|
||||
'まれない': ['む'],
|
||||
},
|
||||
type: LemmatizationRuleType.suffix,
|
||||
),
|
||||
validChildClasses: [WordClass.godanVerb],
|
||||
wordClass: WordClass.godanVerb,
|
||||
),
|
||||
LemmatizationRule(
|
||||
name: 'Godan verb - negative causative form',
|
||||
pattern: AllomorphPattern(
|
||||
patterns: {
|
||||
'かせない': ['く'],
|
||||
'がせない': ['ぐ'],
|
||||
'させない': ['す'],
|
||||
'たせない': ['つ', 'る', 'う'],
|
||||
'なせない': ['ぬ'],
|
||||
'ばせない': ['ぶ'],
|
||||
'ませない': ['む'],
|
||||
},
|
||||
type: LemmatizationRuleType.suffix,
|
||||
),
|
||||
validChildClasses: [WordClass.godanVerb],
|
||||
wordClass: WordClass.godanVerb,
|
||||
),
|
||||
LemmatizationRule(
|
||||
name: 'Godan verb - negative causative-passive form',
|
||||
pattern: AllomorphPattern(
|
||||
patterns: {
|
||||
'かされない': ['く'],
|
||||
'がされない': ['ぐ'],
|
||||
'されない': ['す'],
|
||||
'たされない': ['つ', 'る', 'う'],
|
||||
'なされない': ['ぬ'],
|
||||
'ばされない': ['ぶ'],
|
||||
'まされない': ['む'],
|
||||
},
|
||||
type: LemmatizationRuleType.suffix,
|
||||
),
|
||||
validChildClasses: [WordClass.godanVerb],
|
||||
wordClass: WordClass.godanVerb,
|
||||
),
|
||||
LemmatizationRule(
|
||||
name: 'Godan verb - negative imperative form',
|
||||
pattern: AllomorphPattern(
|
||||
patterns: {
|
||||
'うな': ['う'],
|
||||
'くな': ['く'],
|
||||
'ぐな': ['ぐ'],
|
||||
'すな': ['す'],
|
||||
'つな': ['つ'],
|
||||
'ぬな': ['ぬ'],
|
||||
'ぶな': ['ぶ'],
|
||||
'むな': ['む'],
|
||||
'るな': ['る'],
|
||||
},
|
||||
type: LemmatizationRuleType.suffix,
|
||||
),
|
||||
validChildClasses: [WordClass.godanVerb],
|
||||
wordClass: WordClass.godanVerb,
|
||||
),
|
||||
LemmatizationRule(
|
||||
name: 'Godan verb - desire form',
|
||||
pattern: AllomorphPattern(
|
||||
patterns: {
|
||||
'きたい': ['く'],
|
||||
'ぎたい': ['ぐ'],
|
||||
'したい': ['す'],
|
||||
'ちたい': ['つ'],
|
||||
'にたい': ['ぬ'],
|
||||
'びたい': ['ぶ'],
|
||||
'みたい': ['む'],
|
||||
'りたい': ['る'],
|
||||
},
|
||||
type: LemmatizationRuleType.suffix,
|
||||
),
|
||||
validChildClasses: [WordClass.godanVerb],
|
||||
wordClass: WordClass.godanVerb,
|
||||
),
|
||||
LemmatizationRule(
|
||||
name: 'Godan verb - negative desire form',
|
||||
pattern: AllomorphPattern(
|
||||
patterns: {
|
||||
'いたくない': ['う'],
|
||||
'きたくない': ['く'],
|
||||
'ぎたくない': ['ぐ'],
|
||||
'したくない': ['す'],
|
||||
'ちたくない': ['つ'],
|
||||
'にたくない': ['ぬ'],
|
||||
'びたくない': ['ぶ'],
|
||||
'みたくない': ['む'],
|
||||
'りたくない': ['る'],
|
||||
},
|
||||
type: LemmatizationRuleType.suffix,
|
||||
),
|
||||
validChildClasses: [WordClass.godanVerb],
|
||||
wordClass: WordClass.godanVerb,
|
||||
),
|
||||
LemmatizationRule(
|
||||
name: 'Godan verb - past desire form',
|
||||
pattern: AllomorphPattern(
|
||||
patterns: {
|
||||
'きたかった': ['く'],
|
||||
'ぎたかった': ['ぐ'],
|
||||
'したかった': ['す'],
|
||||
'ちたかった': ['つ'],
|
||||
'にたかった': ['ぬ'],
|
||||
'びたかった': ['ぶ'],
|
||||
'みたかった': ['む'],
|
||||
'りたかった': ['る'],
|
||||
},
|
||||
type: LemmatizationRuleType.suffix,
|
||||
),
|
||||
validChildClasses: [WordClass.godanVerb],
|
||||
wordClass: WordClass.godanVerb,
|
||||
),
|
||||
LemmatizationRule(
|
||||
name: 'Godan verb - negative past desire form',
|
||||
pattern: AllomorphPattern(
|
||||
patterns: {
|
||||
'いたくなかった': ['う'],
|
||||
'きたくなかった': ['く'],
|
||||
'ぎたくなかった': ['ぐ'],
|
||||
'したくなかった': ['す'],
|
||||
'ちたくなかった': ['つ'],
|
||||
'にたくなかった': ['ぬ'],
|
||||
'びたくなかった': ['ぶ'],
|
||||
'みたくなかった': ['む'],
|
||||
'りたくなかった': ['る'],
|
||||
},
|
||||
type: LemmatizationRuleType.suffix,
|
||||
),
|
||||
validChildClasses: [WordClass.godanVerb],
|
||||
wordClass: WordClass.godanVerb,
|
||||
),
|
||||
];
|
||||
61
lib/util/lemmatizer/rules/i-adjectives.dart
Normal file
61
lib/util/lemmatizer/rules/i-adjectives.dart
Normal file
@@ -0,0 +1,61 @@
|
||||
import 'package:jadb/util/lemmatizer/lemmatizer.dart';
|
||||
|
||||
List<LemmatizationRule> iAdjectiveLemmatizationRules = [
|
||||
LemmatizationRule.simple(
|
||||
name: 'I adjective - base form',
|
||||
terminal: true,
|
||||
pattern: 'い',
|
||||
replacement: 'い',
|
||||
validChildClasses: [WordClass.iAdjective],
|
||||
wordClass: WordClass.iAdjective,
|
||||
),
|
||||
LemmatizationRule.simple(
|
||||
name: 'I adjective - negative form',
|
||||
pattern: 'くない',
|
||||
replacement: 'い',
|
||||
validChildClasses: [WordClass.iAdjective],
|
||||
wordClass: WordClass.iAdjective,
|
||||
),
|
||||
LemmatizationRule.simple(
|
||||
name: 'I adjective - past form',
|
||||
pattern: 'かった',
|
||||
replacement: 'い',
|
||||
validChildClasses: [WordClass.iAdjective],
|
||||
wordClass: WordClass.iAdjective,
|
||||
),
|
||||
LemmatizationRule.simple(
|
||||
name: 'I adjective - negative past form',
|
||||
pattern: 'くなかった',
|
||||
replacement: 'い',
|
||||
validChildClasses: [WordClass.iAdjective],
|
||||
wordClass: WordClass.iAdjective,
|
||||
),
|
||||
LemmatizationRule.simple(
|
||||
name: 'I adjective - te-form',
|
||||
pattern: 'くて',
|
||||
replacement: 'い',
|
||||
validChildClasses: [WordClass.iAdjective],
|
||||
wordClass: WordClass.iAdjective,
|
||||
),
|
||||
LemmatizationRule.simple(
|
||||
name: 'I adjective - conditional form',
|
||||
pattern: 'ければ',
|
||||
replacement: 'い',
|
||||
validChildClasses: [WordClass.iAdjective],
|
||||
wordClass: WordClass.iAdjective,
|
||||
),
|
||||
LemmatizationRule.simple(
|
||||
name: 'I adjective - volitional form',
|
||||
pattern: 'かろう',
|
||||
replacement: 'い',
|
||||
validChildClasses: [WordClass.iAdjective],
|
||||
wordClass: WordClass.iAdjective,
|
||||
),
|
||||
LemmatizationRule.simple(
|
||||
name: 'I adjective - continuative form',
|
||||
pattern: 'く',
|
||||
replacement: 'い',
|
||||
validChildClasses: [WordClass.iAdjective],
|
||||
wordClass: WordClass.iAdjective,
|
||||
),
|
||||
];
|
||||
241
lib/util/lemmatizer/rules/ichidan-verbs.dart
Normal file
241
lib/util/lemmatizer/rules/ichidan-verbs.dart
Normal file
@@ -0,0 +1,241 @@
|
||||
import 'package:jadb/util/lemmatizer/lemmatizer.dart';
|
||||
import 'package:jadb/util/text_filtering.dart';
|
||||
|
||||
List<Pattern> lookBehinds = [
|
||||
kanjiRegex,
|
||||
'き',
|
||||
'ぎ',
|
||||
'し',
|
||||
'じ',
|
||||
'ち',
|
||||
'ぢ',
|
||||
'に',
|
||||
'ひ',
|
||||
'び',
|
||||
'び',
|
||||
'み',
|
||||
'り',
|
||||
'け',
|
||||
'げ',
|
||||
'せ',
|
||||
'ぜ',
|
||||
'て',
|
||||
'で',
|
||||
'ね',
|
||||
'へ',
|
||||
'べ',
|
||||
'め',
|
||||
'れ',
|
||||
];
|
||||
|
||||
List<LemmatizationRule> ichidanVerbLemmatizationRules = [
|
||||
LemmatizationRule.simple(
|
||||
name: 'Ichidan verb - base form',
|
||||
terminal: true,
|
||||
pattern: 'る',
|
||||
replacement: 'る',
|
||||
lookAheadBehind: lookBehinds,
|
||||
validChildClasses: [WordClass.ichidanVerb],
|
||||
wordClass: WordClass.ichidanVerb,
|
||||
),
|
||||
LemmatizationRule.simple(
|
||||
name: 'Ichidan verb - negative form',
|
||||
pattern: 'ない',
|
||||
replacement: 'る',
|
||||
lookAheadBehind: lookBehinds,
|
||||
validChildClasses: [WordClass.ichidanVerb],
|
||||
wordClass: WordClass.ichidanVerb,
|
||||
),
|
||||
LemmatizationRule.simple(
|
||||
name: 'Ichidan verb - past form',
|
||||
pattern: 'た',
|
||||
replacement: 'る',
|
||||
lookAheadBehind: lookBehinds,
|
||||
validChildClasses: [WordClass.ichidanVerb],
|
||||
wordClass: WordClass.ichidanVerb,
|
||||
),
|
||||
LemmatizationRule.simple(
|
||||
name: 'Ichidan verb - te-form',
|
||||
pattern: 'て',
|
||||
replacement: 'る',
|
||||
lookAheadBehind: lookBehinds,
|
||||
validChildClasses: [WordClass.ichidanVerb],
|
||||
wordClass: WordClass.ichidanVerb,
|
||||
),
|
||||
LemmatizationRule.simple(
|
||||
name: 'Ichidan verb - te-form with いる',
|
||||
pattern: 'ている',
|
||||
replacement: 'る',
|
||||
lookAheadBehind: lookBehinds,
|
||||
validChildClasses: [WordClass.ichidanVerb],
|
||||
wordClass: WordClass.ichidanVerb,
|
||||
),
|
||||
LemmatizationRule.simple(
|
||||
name: 'Ichidan verb - te-form with いた',
|
||||
pattern: 'ていた',
|
||||
replacement: 'る',
|
||||
lookAheadBehind: lookBehinds,
|
||||
validChildClasses: [WordClass.ichidanVerb],
|
||||
wordClass: WordClass.ichidanVerb,
|
||||
),
|
||||
LemmatizationRule.simple(
|
||||
name: 'Ichidan verb - conditional form',
|
||||
pattern: 'れば',
|
||||
replacement: 'る',
|
||||
lookAheadBehind: lookBehinds,
|
||||
validChildClasses: [WordClass.ichidanVerb],
|
||||
wordClass: WordClass.ichidanVerb,
|
||||
),
|
||||
LemmatizationRule.simple(
|
||||
name: 'Ichidan verb - volitional form',
|
||||
pattern: 'よう',
|
||||
replacement: 'る',
|
||||
lookAheadBehind: lookBehinds,
|
||||
validChildClasses: [WordClass.ichidanVerb],
|
||||
wordClass: WordClass.ichidanVerb,
|
||||
),
|
||||
LemmatizationRule.simple(
|
||||
name: 'Ichidan verb - potential form',
|
||||
pattern: 'られる',
|
||||
replacement: 'る',
|
||||
lookAheadBehind: lookBehinds,
|
||||
validChildClasses: [WordClass.ichidanVerb],
|
||||
wordClass: WordClass.ichidanVerb,
|
||||
),
|
||||
LemmatizationRule.simple(
|
||||
name: 'Ichidan verb - passive form',
|
||||
pattern: 'られる',
|
||||
replacement: 'る',
|
||||
lookAheadBehind: lookBehinds,
|
||||
validChildClasses: [WordClass.ichidanVerb],
|
||||
wordClass: WordClass.ichidanVerb,
|
||||
),
|
||||
LemmatizationRule.simple(
|
||||
name: 'Ichidan verb - causative form',
|
||||
pattern: 'させる',
|
||||
replacement: 'る',
|
||||
lookAheadBehind: lookBehinds,
|
||||
validChildClasses: [WordClass.ichidanVerb],
|
||||
wordClass: WordClass.ichidanVerb,
|
||||
),
|
||||
LemmatizationRule.simple(
|
||||
name: 'Ichidan verb - causative passive form',
|
||||
pattern: 'させられる',
|
||||
replacement: 'る',
|
||||
lookAheadBehind: lookBehinds,
|
||||
validChildClasses: [WordClass.ichidanVerb],
|
||||
wordClass: WordClass.ichidanVerb,
|
||||
),
|
||||
LemmatizationRule.simple(
|
||||
name: 'Ichidan verb - imperative form',
|
||||
pattern: 'れ',
|
||||
replacement: 'る',
|
||||
lookAheadBehind: lookBehinds,
|
||||
validChildClasses: [WordClass.ichidanVerb],
|
||||
wordClass: WordClass.ichidanVerb,
|
||||
),
|
||||
LemmatizationRule.simple(
|
||||
name: 'Ichidan verb - negative past form',
|
||||
pattern: 'なかった',
|
||||
replacement: 'る',
|
||||
lookAheadBehind: lookBehinds,
|
||||
validChildClasses: [WordClass.ichidanVerb],
|
||||
wordClass: WordClass.ichidanVerb,
|
||||
),
|
||||
LemmatizationRule.simple(
|
||||
name: 'Ichidan verb - negative te-form',
|
||||
pattern: 'なくて',
|
||||
replacement: 'る',
|
||||
lookAheadBehind: lookBehinds,
|
||||
validChildClasses: [WordClass.ichidanVerb],
|
||||
wordClass: WordClass.ichidanVerb,
|
||||
),
|
||||
LemmatizationRule.simple(
|
||||
name: 'Ichidan verb - negative conditional form',
|
||||
pattern: 'なければ',
|
||||
replacement: 'る',
|
||||
lookAheadBehind: lookBehinds,
|
||||
validChildClasses: [WordClass.ichidanVerb],
|
||||
wordClass: WordClass.ichidanVerb,
|
||||
),
|
||||
LemmatizationRule.simple(
|
||||
name: 'Ichidan verb - negative volitional form',
|
||||
pattern: 'なかろう',
|
||||
replacement: 'る',
|
||||
lookAheadBehind: lookBehinds,
|
||||
validChildClasses: [WordClass.ichidanVerb],
|
||||
wordClass: WordClass.ichidanVerb,
|
||||
),
|
||||
LemmatizationRule.simple(
|
||||
name: 'Ichidan verb - negative potential form',
|
||||
pattern: 'られない',
|
||||
replacement: 'る',
|
||||
lookAheadBehind: lookBehinds,
|
||||
validChildClasses: [WordClass.ichidanVerb],
|
||||
wordClass: WordClass.ichidanVerb,
|
||||
),
|
||||
LemmatizationRule.simple(
|
||||
name: 'Ichidan verb - negative passive form',
|
||||
pattern: 'られない',
|
||||
replacement: 'る',
|
||||
lookAheadBehind: lookBehinds,
|
||||
validChildClasses: [WordClass.ichidanVerb],
|
||||
wordClass: WordClass.ichidanVerb,
|
||||
),
|
||||
LemmatizationRule.simple(
|
||||
name: 'Ichidan verb - negative causative form',
|
||||
pattern: 'させない',
|
||||
replacement: 'る',
|
||||
lookAheadBehind: lookBehinds,
|
||||
validChildClasses: [WordClass.ichidanVerb],
|
||||
wordClass: WordClass.ichidanVerb,
|
||||
),
|
||||
LemmatizationRule.simple(
|
||||
name: 'Ichidan verb - negative causative passive form',
|
||||
pattern: 'させられない',
|
||||
replacement: 'る',
|
||||
lookAheadBehind: lookBehinds,
|
||||
validChildClasses: [WordClass.ichidanVerb],
|
||||
wordClass: WordClass.ichidanVerb,
|
||||
),
|
||||
LemmatizationRule.simple(
|
||||
name: 'Ichidan verb - negative imperative form',
|
||||
pattern: 'るな',
|
||||
replacement: 'る',
|
||||
lookAheadBehind: lookBehinds,
|
||||
validChildClasses: [WordClass.ichidanVerb],
|
||||
wordClass: WordClass.ichidanVerb,
|
||||
),
|
||||
LemmatizationRule.simple(
|
||||
name: 'Ichidan verb - desire form',
|
||||
pattern: 'たい',
|
||||
replacement: 'る',
|
||||
lookAheadBehind: lookBehinds,
|
||||
validChildClasses: [WordClass.ichidanVerb],
|
||||
wordClass: WordClass.ichidanVerb,
|
||||
),
|
||||
LemmatizationRule.simple(
|
||||
name: 'Ichidan verb - negative desire form',
|
||||
pattern: 'たくない',
|
||||
replacement: 'る',
|
||||
lookAheadBehind: lookBehinds,
|
||||
validChildClasses: [WordClass.ichidanVerb],
|
||||
wordClass: WordClass.ichidanVerb,
|
||||
),
|
||||
LemmatizationRule.simple(
|
||||
name: 'Ichidan verb - past desire form',
|
||||
pattern: 'たかった',
|
||||
replacement: 'る',
|
||||
lookAheadBehind: lookBehinds,
|
||||
validChildClasses: [WordClass.ichidanVerb],
|
||||
wordClass: WordClass.ichidanVerb,
|
||||
),
|
||||
LemmatizationRule.simple(
|
||||
name: 'Ichidan verb - negative past desire form',
|
||||
pattern: 'たくなかった',
|
||||
replacement: 'る',
|
||||
lookAheadBehind: lookBehinds,
|
||||
validChildClasses: [WordClass.ichidanVerb],
|
||||
wordClass: WordClass.ichidanVerb,
|
||||
),
|
||||
];
|
||||
@@ -4,7 +4,7 @@
|
||||
/// See https://www.regular-expressions.info/unicode.html
|
||||
///
|
||||
/// Remember to turn on the unicode flag when making a new RegExp.
|
||||
const String rawKanjiRegex = r'\p{Script=Hani}';
|
||||
const String rawCJKRegex = r'\p{Script=Hani}';
|
||||
|
||||
/// The string version of a regex that will match any katakana.
|
||||
/// This includes the ranges (), ()
|
||||
@@ -22,7 +22,24 @@ const String rawKatakanaRegex = r'\p{Script=Katakana}';
|
||||
/// Remember to turn on the unicode flag when making a new RegExp.
|
||||
const String rawHiraganaRegex = r'\p{Script=Hiragana}';
|
||||
|
||||
/// The string version of a regex that will match any kanji.
|
||||
/// This includes the ranges (), ()
|
||||
///
|
||||
/// See https://www.regular-expressions.info/unicode.html
|
||||
///
|
||||
/// Remember to turn on the unicode flag when making a new RegExp.
|
||||
const String rawKanjiRegex = r'[\u3400-\u4DB5\u4E00-\u9FCB\uF900-\uFA6A]';
|
||||
|
||||
final RegExp kanjiRegex = RegExp(rawKanjiRegex, unicode: true);
|
||||
final RegExp cjkRegex = RegExp(rawCJKRegex, unicode: true);
|
||||
final RegExp katakanaRegex = RegExp(rawKatakanaRegex, unicode: true);
|
||||
final RegExp hiraganaRegex = RegExp(rawHiraganaRegex, unicode: true);
|
||||
final RegExp kanjiRegex = RegExp(rawKanjiRegex, unicode: true);
|
||||
|
||||
List<String> filterKanjiSuggestions(String string) {
|
||||
return kanjiRegex
|
||||
.allMatches(string)
|
||||
.map((match) => match.group(0))
|
||||
.where((element) => element != null)
|
||||
.map((element) => element!)
|
||||
.toList();
|
||||
}
|
||||
|
||||
@@ -33,39 +33,40 @@ CREATE TABLE "JMdict_InfoReading" (
|
||||
-- not implement a check for it.
|
||||
|
||||
CREATE TABLE "JMdict_Entry" (
|
||||
"id" INTEGER PRIMARY KEY
|
||||
"entryId" INTEGER PRIMARY KEY
|
||||
);
|
||||
|
||||
-- KanjiElement
|
||||
|
||||
CREATE TABLE "JMdict_KanjiElement" (
|
||||
"entryId" INTEGER NOT NULL REFERENCES "JMdict_Entry"("id"),
|
||||
"orderNum" INTEGER,
|
||||
"elementId" INTEGER PRIMARY KEY,
|
||||
"entryId" INTEGER NOT NULL REFERENCES "JMdict_Entry"("entryId"),
|
||||
"orderNum" INTEGER NOT NULL,
|
||||
"reading" TEXT NOT NULL,
|
||||
"news" INTEGER CHECK ("news" BETWEEN 1 AND 2),
|
||||
"ichi" INTEGER CHECK ("ichi" BETWEEN 1 AND 2),
|
||||
"spec" INTEGER CHECK ("spec" BETWEEN 1 AND 2),
|
||||
"gai" INTEGER CHECK ("gai" BETWEEN 1 AND 2),
|
||||
"nf" INTEGER CHECK ("nf" BETWEEN 1 AND 48),
|
||||
PRIMARY KEY ("entryId", "reading")
|
||||
UNIQUE("entryId", "reading"),
|
||||
UNIQUE("entryId", "orderNum")
|
||||
) WITHOUT ROWID;
|
||||
|
||||
CREATE INDEX "JMdict_KanjiElement_byEntryId_byOrderNum" ON "JMdict_KanjiElement"("entryId", "orderNum");
|
||||
CREATE INDEX "JMdict_KanjiElement_byReading" ON "JMdict_KanjiElement"("reading");
|
||||
|
||||
CREATE TABLE "JMdict_KanjiElementInfo" (
|
||||
"entryId" INTEGER NOT NULL,
|
||||
"reading" TEXT NOT NULL,
|
||||
"elementId" INTEGER NOT NULL REFERENCES "JMdict_KanjiElement"("elementId"),
|
||||
"info" TEXT NOT NULL REFERENCES "JMdict_InfoKanji"("id"),
|
||||
FOREIGN KEY ("entryId", "reading")
|
||||
REFERENCES "JMdict_KanjiElement"("entryId", "reading"),
|
||||
PRIMARY KEY ("entryId", "reading", "info")
|
||||
PRIMARY KEY ("elementId", "info")
|
||||
) WITHOUT ROWID;
|
||||
|
||||
-- ReadingElement
|
||||
|
||||
CREATE TABLE "JMdict_ReadingElement" (
|
||||
"entryId" INTEGER NOT NULL REFERENCES "JMdict_Entry"("id"),
|
||||
"orderNum" INTEGER,
|
||||
"elementId" INTEGER PRIMARY KEY,
|
||||
"entryId" INTEGER NOT NULL REFERENCES "JMdict_Entry"("entryId"),
|
||||
"orderNum" INTEGER NOT NULL,
|
||||
"reading" TEXT NOT NULL,
|
||||
"readingDoesNotMatchKanji" BOOLEAN NOT NULL DEFAULT FALSE,
|
||||
"news" INTEGER CHECK ("news" BETWEEN 1 AND 2),
|
||||
@@ -73,55 +74,51 @@ CREATE TABLE "JMdict_ReadingElement" (
|
||||
"spec" INTEGER CHECK ("spec" BETWEEN 1 AND 2),
|
||||
"gai" INTEGER CHECK ("gai" BETWEEN 1 AND 2),
|
||||
"nf" INTEGER CHECK ("nf" BETWEEN 1 AND 48),
|
||||
PRIMARY KEY ("entryId", "reading")
|
||||
UNIQUE("entryId", "reading"),
|
||||
UNIQUE("entryId", "orderNum")
|
||||
) WITHOUT ROWID;
|
||||
|
||||
CREATE INDEX "JMdict_ReadingElement_byEntryId_byOrderNum" ON "JMdict_ReadingElement"("entryId", "orderNum");
|
||||
CREATE INDEX "JMdict_ReadingElement_byReading" ON "JMdict_ReadingElement"("reading");
|
||||
|
||||
CREATE TABLE "JMdict_ReadingElementRestriction" (
|
||||
"entryId" INTEGER NOT NULL,
|
||||
"reading" TEXT NOT NULL,
|
||||
"elementId" INTEGER NOT NULL REFERENCES "JMdict_ReadingElement"("elementId"),
|
||||
"restriction" TEXT NOT NULL,
|
||||
FOREIGN KEY ("entryId", "reading")
|
||||
REFERENCES "JMdict_ReadingElement"("entryId", "reading"),
|
||||
PRIMARY KEY ("entryId", "reading", "restriction")
|
||||
PRIMARY KEY ("elementId", "restriction")
|
||||
) WITHOUT ROWID;
|
||||
|
||||
CREATE TABLE "JMdict_ReadingElementInfo" (
|
||||
"entryId" INTEGER NOT NULL,
|
||||
"reading" TEXT NOT NULL,
|
||||
"elementId" INTEGER NOT NULL REFERENCES "JMdict_ReadingElement"("elementId"),
|
||||
"info" TEXT NOT NULL REFERENCES "JMdict_InfoReading"("id"),
|
||||
FOREIGN KEY ("entryId", "reading")
|
||||
REFERENCES "JMdict_ReadingElement"("entryId", "reading"),
|
||||
PRIMARY KEY ("entryId", "reading", "info")
|
||||
PRIMARY KEY ("elementId", "info")
|
||||
) WITHOUT ROWID;
|
||||
|
||||
-- Sense
|
||||
|
||||
CREATE TABLE "JMdict_Sense" (
|
||||
"id" INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
"entryId" INTEGER REFERENCES "JMdict_Entry"("id"),
|
||||
"orderNum" INTEGER,
|
||||
"senseId" INTEGER PRIMARY KEY,
|
||||
"entryId" INTEGER NOT NULL REFERENCES "JMdict_Entry"("entryId"),
|
||||
"orderNum" INTEGER NOT NULL,
|
||||
UNIQUE("entryId", "orderNum")
|
||||
);
|
||||
|
||||
CREATE INDEX "JMdict_Sense_byEntryId_byOrderNum" ON "JMdict_Sense"("entryId", "orderNum");
|
||||
|
||||
CREATE TABLE "JMdict_SenseRestrictedToKanji" (
|
||||
"entryId" INTEGER,
|
||||
"senseId" INTEGER REFERENCES "JMdict_Sense"("id"),
|
||||
"kanji" TEXT,
|
||||
"entryId" INTEGER NOT NULL,
|
||||
"senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("senseId"),
|
||||
"kanji" TEXT NOT NULL,
|
||||
FOREIGN KEY ("entryId", "kanji") REFERENCES "JMdict_KanjiElement"("entryId", "reading"),
|
||||
PRIMARY KEY ("entryId", "senseId", "kanji")
|
||||
);
|
||||
) WITHOUT ROWID;
|
||||
|
||||
CREATE TABLE "JMdict_SenseRestrictedToReading" (
|
||||
"entryId" INTEGER,
|
||||
"senseId" INTEGER REFERENCES "JMdict_Sense"("id"),
|
||||
"reading" TEXT,
|
||||
"entryId" INTEGER NOT NULL,
|
||||
"senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("senseId"),
|
||||
"reading" TEXT NOT NULL,
|
||||
FOREIGN KEY ("entryId", "reading") REFERENCES "JMdict_ReadingElement"("entryId", "reading"),
|
||||
PRIMARY KEY ("entryId", "senseId", "reading")
|
||||
);
|
||||
) WITHOUT ROWID;
|
||||
|
||||
-- In order to add xrefs, you will need to have added the entry to xref to.
|
||||
-- These should be added in a second pass of the dictionary file.
|
||||
@@ -134,37 +131,33 @@ CREATE TABLE "JMdict_SenseRestrictedToReading" (
|
||||
-- These two things also concern "SenseAntonym"
|
||||
|
||||
CREATE TABLE "JMdict_SenseSeeAlso" (
|
||||
"senseId" INTEGER REFERENCES "JMdict_Sense"("id"),
|
||||
"xrefEntryId" INTEGER,
|
||||
"senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("senseId"),
|
||||
"xrefEntryId" INTEGER NOT NULL,
|
||||
"seeAlsoReading" TEXT,
|
||||
"seeAlsoKanji" TEXT,
|
||||
"seeAlsoSense" INTEGER,
|
||||
-- For some entries, the cross reference is ambiguous. This means that while the ingestion
|
||||
-- has determined some xrefEntryId, it is not guaranteed to be the correct one.
|
||||
"ambiguous" BOOLEAN,
|
||||
CHECK ("seeAlsoReading" = NULL <> "seeAlsoKanji" = NULL),
|
||||
-- CHECK("seeAlsoSense" = NULL OR "seeAlsoSense")
|
||||
-- TODO: Check that if seeAlsoSense is present, it refers to a sense connected to xrefEntryId.
|
||||
"ambiguous" BOOLEAN NOT NULL DEFAULT FALSE,
|
||||
FOREIGN KEY ("xrefEntryId", "seeAlsoKanji") REFERENCES "JMdict_KanjiElement"("entryId", "reading"),
|
||||
FOREIGN KEY ("xrefEntryId", "seeAlsoReading") REFERENCES "JMdict_ReadingElement"("entryId", "reading"),
|
||||
FOREIGN KEY ("xrefEntryId", "seeAlsoSense") REFERENCES "JMdict_Sense"("entryId", "orderNum"),
|
||||
PRIMARY KEY ("senseId", "xrefEntryId", "seeAlsoReading", "seeAlsoKanji", "seeAlsoSense")
|
||||
UNIQUE("senseId", "xrefEntryId", "seeAlsoReading", "seeAlsoKanji", "seeAlsoSense")
|
||||
);
|
||||
|
||||
CREATE TABLE "JMdict_SenseAntonym" (
|
||||
"senseId" INTEGER REFERENCES "JMdict_Sense"("id"),
|
||||
"xrefEntryId" INTEGER,
|
||||
"senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("senseId"),
|
||||
"xrefEntryId" INTEGER NOT NULL,
|
||||
"antonymReading" TEXT,
|
||||
"antonymKanji" TEXT,
|
||||
"antonymSense" INTEGER,
|
||||
-- For some entries, the cross reference is ambiguous. This means that while the ingestion
|
||||
-- has determined some xrefEntryId, it is not guaranteed to be the correct one.
|
||||
"ambiguous" BOOLEAN,
|
||||
CHECK ("antonymReading" = NULL <> "antonymKanji" = NULL),
|
||||
"ambiguous" BOOLEAN NOT NULL DEFAULT FALSE,
|
||||
FOREIGN KEY ("xrefEntryId", "antonymKanji") REFERENCES "JMdict_KanjiElement"("entryId", "reading"),
|
||||
FOREIGN KEY ("xrefEntryId", "antonymReading") REFERENCES "JMdict_ReadingElement"("entryId", "reading"),
|
||||
FOREIGN KEY ("xrefEntryId", "antonymSense") REFERENCES "JMdict_Sense"("entryId", "orderNum"),
|
||||
PRIMARY KEY ("senseId", "xrefEntryId", "antonymReading", "antonymKanji", "antonymSense")
|
||||
UNIQUE("senseId", "xrefEntryId", "antonymReading", "antonymKanji", "antonymSense")
|
||||
);
|
||||
|
||||
-- These cross references are going to be mostly accessed from a sense
|
||||
@@ -173,7 +166,7 @@ CREATE INDEX "JMdict_SenseSeeAlso_bySenseId" ON "JMdict_SenseSeeAlso"("senseId")
|
||||
CREATE INDEX "JMdict_SenseAntonym_bySenseId" ON "JMdict_SenseAntonym"("senseId");
|
||||
|
||||
CREATE TABLE "JMdict_SensePOS" (
|
||||
"senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("id"),
|
||||
"senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("senseId"),
|
||||
"pos" TEXT NOT NULL REFERENCES "JMdict_InfoPOS"("id"),
|
||||
PRIMARY KEY ("senseId", "pos")
|
||||
) WITHOUT ROWID;
|
||||
@@ -181,28 +174,28 @@ CREATE TABLE "JMdict_SensePOS" (
|
||||
CREATE TABLE "JMdict_SenseField" (
|
||||
"senseId" INTEGER NOT NULL,
|
||||
"field" TEXT NOT NULL,
|
||||
FOREIGN KEY ("senseId") REFERENCES "JMdict_Sense"("id"),
|
||||
FOREIGN KEY ("senseId") REFERENCES "JMdict_Sense"("senseId"),
|
||||
FOREIGN KEY ("field") REFERENCES "JMdict_InfoField"("id"),
|
||||
PRIMARY KEY ("senseId", "field")
|
||||
) WITHOUT ROWID;
|
||||
|
||||
CREATE TABLE "JMdict_SenseMisc" (
|
||||
"senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("id"),
|
||||
"senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("senseId"),
|
||||
"misc" TEXT NOT NULL REFERENCES "JMdict_InfoMisc"("id"),
|
||||
PRIMARY KEY ("senseId", "misc")
|
||||
) WITHOUT ROWID;
|
||||
|
||||
CREATE TABLE "JMdict_SenseLanguageSource" (
|
||||
"senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("id"),
|
||||
"senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("senseId"),
|
||||
"language" CHAR(3) NOT NULL DEFAULT "eng",
|
||||
"phrase" TEXT,
|
||||
"fullyDescribesSense" BOOLEAN NOT NULL DEFAULT TRUE,
|
||||
"constructedFromSmallerWords" BOOLEAN NOT NULL DEFAULT FALSE,
|
||||
PRIMARY KEY ("senseId", "language", "phrase")
|
||||
UNIQUE("senseId", "language", "phrase")
|
||||
);
|
||||
|
||||
CREATE TABLE "JMdict_SenseDialect" (
|
||||
"senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("id"),
|
||||
"senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("senseId"),
|
||||
"dialect" TEXT NOT NULL REFERENCES "JMdict_InfoDialect"("id"),
|
||||
PRIMARY KEY ("senseId", "dialect")
|
||||
) WITHOUT ROWID;
|
||||
@@ -213,7 +206,7 @@ CREATE TABLE "JMdict_SenseDialect" (
|
||||
-- will be omitted.
|
||||
|
||||
CREATE TABLE "JMdict_SenseGlossary" (
|
||||
"senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("id"),
|
||||
"senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("senseId"),
|
||||
"phrase" TEXT NOT NULL,
|
||||
"language" CHAR(3) NOT NULL DEFAULT "eng",
|
||||
"type" TEXT,
|
||||
@@ -223,7 +216,7 @@ CREATE TABLE "JMdict_SenseGlossary" (
|
||||
CREATE INDEX "JMdict_SenseGlossary_byPhrase" ON JMdict_SenseGlossary("phrase");
|
||||
|
||||
CREATE TABLE "JMdict_SenseInfo" (
|
||||
"senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("id"),
|
||||
"senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("senseId"),
|
||||
"info" TEXT NOT NULL,
|
||||
PRIMARY KEY ("senseId", "info")
|
||||
) WITHOUT ROWID;
|
||||
@@ -232,8 +225,7 @@ CREATE TABLE "JMdict_SenseInfo" (
|
||||
-- the Tanaka Corpus, so I will leave the type out for now.
|
||||
|
||||
CREATE TABLE "JMdict_ExampleSentence" (
|
||||
"id" INTEGER PRIMARY KEY,
|
||||
"senseId" INTEGER REFERENCES "JMdict_Sense"("id"),
|
||||
"senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("senseId"),
|
||||
"word" TEXT NOT NULL,
|
||||
"source" TEXT NOT NULL,
|
||||
"sourceLanguage" CHAR(3) NOT NULL DEFAULT "eng",
|
||||
|
||||
55
migrations/0003_JMDict_FTS5_Ttables.sql
Normal file
55
migrations/0003_JMDict_FTS5_Ttables.sql
Normal file
@@ -0,0 +1,55 @@
|
||||
CREATE VIRTUAL TABLE "JMdict_KanjiElementFTS" USING FTS5("elementId" UNINDEXED, "reading");
|
||||
|
||||
CREATE TRIGGER "JMdict_KanjiElement_InsertFTS"
|
||||
AFTER INSERT ON "JMdict_KanjiElement"
|
||||
BEGIN
|
||||
INSERT INTO "JMdict_KanjiElementFTS"("elementId", "reading")
|
||||
VALUES (NEW."elementId", NEW."reading");
|
||||
END;
|
||||
|
||||
CREATE TRIGGER "JMdict_KanjiElement_UpdateFTS"
|
||||
AFTER UPDATE OF "elementId", "reading"
|
||||
ON "JMdict_KanjiElement"
|
||||
BEGIN
|
||||
UPDATE "JMdict_KanjiElementFTS"
|
||||
SET
|
||||
"elementId" = NEW."elementId",
|
||||
"reading" = NEW."reading"
|
||||
WHERE "elementId" = OLD."elementId";
|
||||
END;
|
||||
|
||||
CREATE TRIGGER "JMdict_KanjiElement_DeleteFTS"
|
||||
AFTER DELETE ON "JMdict_KanjiElement"
|
||||
BEGIN
|
||||
DELETE FROM "JMdict_KanjiElementFTS"
|
||||
WHERE "elementId" = OLD."elementId";
|
||||
END;
|
||||
|
||||
|
||||
|
||||
CREATE VIRTUAL TABLE "JMdict_ReadingElementFTS" USING FTS5("elementId" UNINDEXED, "reading");
|
||||
|
||||
CREATE TRIGGER "JMdict_ReadingElement_InsertFTS"
|
||||
AFTER INSERT ON "JMdict_ReadingElement"
|
||||
BEGIN
|
||||
INSERT INTO "JMdict_ReadingElementFTS"("elementId", "reading")
|
||||
VALUES (NEW."elementId", NEW."reading");
|
||||
END;
|
||||
|
||||
CREATE TRIGGER "JMdict_ReadingElement_UpdateFTS"
|
||||
AFTER UPDATE OF "elementId", "reading"
|
||||
ON "JMdict_ReadingElement"
|
||||
BEGIN
|
||||
UPDATE "JMdict_ReadingElementFTS"
|
||||
SET
|
||||
"elementId" = NEW."elementId",
|
||||
"reading" = NEW."reading"
|
||||
WHERE "elementId" = OLD."elementId";
|
||||
END;
|
||||
|
||||
CREATE TRIGGER "JMdict_ReadingElement_DeleteFTS"
|
||||
AFTER DELETE ON "JMdict_ReadingElement"
|
||||
BEGIN
|
||||
DELETE FROM "JMdict_ReadingElementFTS"
|
||||
WHERE "elementId" = OLD."elementId";
|
||||
END;
|
||||
@@ -2,7 +2,7 @@ CREATE TABLE "JMdict_JLPTTag" (
|
||||
"entryId" INTEGER NOT NULL,
|
||||
"jlptLevel" CHAR(2) NOT NULL CHECK ("jlptLevel" in ('N5', 'N4', 'N3', 'N2', 'N1')),
|
||||
FOREIGN KEY ("entryId")
|
||||
REFERENCES "JMdict_Entry"("id"),
|
||||
REFERENCES "JMdict_Entry"("entryId"),
|
||||
PRIMARY KEY ("entryId", "jlptLevel")
|
||||
) WITHOUT ROWID;
|
||||
|
||||
207
migrations/0005_JMDict_search_index_tables.sql
Normal file
207
migrations/0005_JMDict_search_index_tables.sql
Normal file
@@ -0,0 +1,207 @@
|
||||
CREATE TABLE "JMdict_EntryScore" (
|
||||
"type" CHAR(1) NOT NULL CHECK ("type" IN ('r', 'k')),
|
||||
"entryId" INTEGER NOT NULL REFERENCES "JMdict_Entry"("entryId"),
|
||||
"elementId" INTEGER NOT NULL,
|
||||
"score" INTEGER NOT NULL DEFAULT 0,
|
||||
"common" BOOLEAN NOT NULL DEFAULT FALSE,
|
||||
PRIMARY KEY ("type", "elementId")
|
||||
) WITHOUT ROWID;
|
||||
|
||||
CREATE INDEX "JMdict_EntryScore_byElementId_byScore" ON "JMdict_EntryScore"("elementId", "score");
|
||||
CREATE INDEX "JMdict_EntryScore_byScore" ON "JMdict_EntryScore"("score");
|
||||
CREATE INDEX "JMdict_EntryScore_byCommon" ON "JMdict_EntryScore"("common");
|
||||
|
||||
CREATE INDEX "JMdict_EntryScore_byType_byElementId_byScore" ON "JMdict_EntryScore"("type", "elementId", "score");
|
||||
CREATE INDEX "JMdict_EntryScore_byType_byScore" ON "JMdict_EntryScore"("type", "score");
|
||||
CREATE INDEX "JMdict_EntryScore_byType_byCommon" ON "JMdict_EntryScore"("type", "common");
|
||||
|
||||
-- NOTE: these views are deduplicated in order not to perform an unnecessary
|
||||
-- UNION on every trigger
|
||||
|
||||
CREATE VIEW "JMdict_EntryScoreView_Reading" AS
|
||||
SELECT
|
||||
'r' AS "type",
|
||||
"JMdict_ReadingElement"."entryId",
|
||||
"JMdict_ReadingElement"."elementId",
|
||||
(
|
||||
"news" IS 1
|
||||
OR "ichi" IS 1
|
||||
OR "spec" IS 1
|
||||
OR "gai" IS 1
|
||||
)
|
||||
AS "common",
|
||||
((
|
||||
"news" IS 1
|
||||
OR "ichi" IS 1
|
||||
OR "spec" IS 1
|
||||
OR "gai" IS 1
|
||||
) * 50)
|
||||
+ (("news" IS 1) * 10)
|
||||
+ (("news" IS 2) * 5)
|
||||
+ (("ichi" IS 1) * 10)
|
||||
+ (("ichi" IS 2) * 5)
|
||||
+ (("spec" IS 1) * 10)
|
||||
+ (("spec" IS 2) * 5)
|
||||
+ (("gai" IS 1) * 10)
|
||||
+ (("gai" IS 2) * 5)
|
||||
+ (("orderNum" IS 1) * 20)
|
||||
- (substr(COALESCE("JMdict_JLPTTag"."jlptLevel", 'N0'), 2) * -5)
|
||||
AS "score"
|
||||
FROM "JMdict_ReadingElement"
|
||||
LEFT JOIN "JMdict_JLPTTag" USING ("entryId");
|
||||
|
||||
CREATE VIEW "JMdict_EntryScoreView_Kanji" AS
|
||||
SELECT
|
||||
'k' AS "type",
|
||||
"JMdict_KanjiElement"."entryId",
|
||||
"JMdict_KanjiElement"."elementId",
|
||||
(
|
||||
"news" IS 1
|
||||
OR "ichi" IS 1
|
||||
OR "spec" IS 1
|
||||
OR "gai" IS 1
|
||||
)
|
||||
AS "common",
|
||||
((
|
||||
"news" IS 1
|
||||
OR "ichi" IS 1
|
||||
OR "spec" IS 1
|
||||
OR "gai" IS 1
|
||||
) * 50)
|
||||
+ (("news" IS 1) * 10)
|
||||
+ (("news" IS 2) * 5)
|
||||
+ (("ichi" IS 1) * 10)
|
||||
+ (("ichi" IS 2) * 5)
|
||||
+ (("spec" IS 1) * 10)
|
||||
+ (("spec" IS 2) * 5)
|
||||
+ (("gai" IS 1) * 10)
|
||||
+ (("gai" IS 2) * 5)
|
||||
+ (("orderNum" IS 1) * 20)
|
||||
- (substr(COALESCE("JMdict_JLPTTag"."jlptLevel", 'N0'), 2) * -5)
|
||||
AS "score"
|
||||
FROM "JMdict_KanjiElement"
|
||||
LEFT JOIN "JMdict_JLPTTag" USING ("entryId");
|
||||
|
||||
CREATE VIEW "JMdict_EntryScoreView" AS
|
||||
SELECT *
|
||||
FROM "JMdict_EntryScoreView_Kanji"
|
||||
UNION ALL
|
||||
SELECT *
|
||||
FROM "JMdict_EntryScoreView_Reading";
|
||||
|
||||
|
||||
--- JMdict_ReadingElement triggers
|
||||
|
||||
CREATE TRIGGER "JMdict_EntryScore_Insert_JMdict_ReadingElement"
|
||||
AFTER INSERT ON "JMdict_ReadingElement"
|
||||
BEGIN
|
||||
INSERT INTO "JMdict_EntryScore" (
|
||||
"type",
|
||||
"entryId",
|
||||
"elementId",
|
||||
"score",
|
||||
"common"
|
||||
)
|
||||
SELECT "type", "entryId", "elementId", "score", "common"
|
||||
FROM "JMdict_EntryScoreView_Reading"
|
||||
WHERE "elementId" = NEW."elementId";
|
||||
END;
|
||||
|
||||
CREATE TRIGGER "JMdict_EntryScore_Update_JMdict_ReadingElement"
|
||||
AFTER UPDATE OF "news", "ichi", "spec", "gai", "nf", "orderNum"
|
||||
ON "JMdict_ReadingElement"
|
||||
BEGIN
|
||||
UPDATE "JMdict_EntryScore"
|
||||
SET
|
||||
"score" = "JMdict_EntryScoreView_Reading"."score",
|
||||
"common" = "JMdict_EntryScoreView_Reading"."common"
|
||||
FROM "JMdict_EntryScoreView_Reading"
|
||||
WHERE "elementId" = NEW."elementId";
|
||||
END;
|
||||
|
||||
CREATE TRIGGER "JMdict_EntryScore_Delete_JMdict_ReadingElement"
|
||||
AFTER DELETE ON "JMdict_ReadingElement"
|
||||
BEGIN
|
||||
DELETE FROM "JMdict_EntryScore"
|
||||
WHERE "type" = 'r'
|
||||
AND "elementId" = OLD."elementId";
|
||||
END;
|
||||
|
||||
--- JMdict_KanjiElement triggers
|
||||
|
||||
CREATE TRIGGER "JMdict_EntryScore_Insert_JMdict_KanjiElement"
|
||||
AFTER INSERT ON "JMdict_KanjiElement"
|
||||
BEGIN
|
||||
INSERT INTO "JMdict_EntryScore" (
|
||||
"type",
|
||||
"entryId",
|
||||
"elementId",
|
||||
"score",
|
||||
"common"
|
||||
)
|
||||
SELECT "type", "entryId", "elementId", "score", "common"
|
||||
FROM "JMdict_EntryScoreView_Kanji"
|
||||
WHERE "elementId" = NEW."elementId";
|
||||
END;
|
||||
|
||||
CREATE TRIGGER "JMdict_EntryScore_Update_JMdict_KanjiElement"
|
||||
AFTER UPDATE OF "news", "ichi", "spec", "gai", "nf", "orderNum"
|
||||
ON "JMdict_KanjiElement"
|
||||
BEGIN
|
||||
UPDATE "JMdict_EntryScore"
|
||||
SET
|
||||
"score" = "JMdict_EntryScoreView_Kanji"."score",
|
||||
"common" = "JMdict_EntryScoreView_Kanji"."common"
|
||||
FROM "JMdict_EntryScoreView_Kanji"
|
||||
WHERE "elementId" = NEW."elementId";
|
||||
END;
|
||||
|
||||
CREATE TRIGGER "JMdict_EntryScore_Delete_JMdict_KanjiElement"
|
||||
AFTER DELETE ON "JMdict_KanjiElement"
|
||||
BEGIN
|
||||
DELETE FROM "JMdict_EntryScore"
|
||||
WHERE "type" = 'k'
|
||||
AND "elementId" = OLD."elementId";
|
||||
END;
|
||||
|
||||
--- JMdict_JLPTTag triggers
|
||||
|
||||
CREATE TRIGGER "JMdict_EntryScore_Insert_JMdict_JLPTTag"
|
||||
AFTER INSERT ON "JMdict_JLPTTag"
|
||||
BEGIN
|
||||
UPDATE "JMdict_EntryScore"
|
||||
SET
|
||||
"score" = "JMdict_EntryScoreView"."score",
|
||||
"common" = "JMdict_EntryScoreView"."common"
|
||||
FROM "JMdict_EntryScoreView"
|
||||
WHERE "JMdict_EntryScoreView"."entryId" = NEW."entryId"
|
||||
AND "JMdict_EntryScore"."entryId" = NEW."entryId"
|
||||
AND "JMdict_EntryScoreView"."elementId" = "JMdict_EntryScore"."elementId";
|
||||
END;
|
||||
|
||||
CREATE TRIGGER "JMdict_EntryScore_Update_JMdict_JLPTTag"
|
||||
AFTER UPDATE OF "jlptLevel"
|
||||
ON "JMdict_JLPTTag"
|
||||
BEGIN
|
||||
UPDATE "JMdict_EntryScore"
|
||||
SET
|
||||
"score" = "JMdict_EntryScoreView"."score",
|
||||
"common" = "JMdict_EntryScoreView"."common"
|
||||
FROM "JMdict_EntryScoreView"
|
||||
WHERE "JMdict_EntryScoreView"."entryId" = NEW."entryId"
|
||||
AND "JMdict_EntryScore"."entryId" = NEW."entryId"
|
||||
AND "JMdict_EntryScoreView"."elementId" = "JMdict_EntryScore"."elementId";
|
||||
END;
|
||||
|
||||
CREATE TRIGGER "JMdict_EntryScore_Delete_JMdict_JLPTTag"
|
||||
AFTER DELETE ON "JMdict_JLPTTag"
|
||||
BEGIN
|
||||
UPDATE "JMdict_EntryScore"
|
||||
SET
|
||||
"score" = "JMdict_EntryScoreView"."score",
|
||||
"common" = "JMdict_EntryScoreView"."common"
|
||||
FROM "JMdict_EntryScoreView"
|
||||
WHERE "JMdict_EntryScoreView"."entryId" = OLD."entryId"
|
||||
AND "JMdict_EntryScore"."entryId" = OLD."entryId"
|
||||
AND "JMdict_EntryScoreView"."elementId" = "JMdict_EntryScore"."elementId";
|
||||
END;
|
||||
@@ -6,6 +6,3 @@ CREATE TABLE "RADKFILE" (
|
||||
|
||||
CREATE INDEX "RADK" ON "RADKFILE"("radical");
|
||||
CREATE INDEX "KRAD" ON "RADKFILE"("kanji");
|
||||
|
||||
CREATE VIEW "RADKFILE_Radicals" AS
|
||||
SELECT DISTINCT "radical" FROM "RADKFILE";
|
||||
@@ -1,22 +0,0 @@
|
||||
-- These tables are for optimizing searches.
|
||||
|
||||
-- In order to include results from both, the software should
|
||||
-- first check if the searchword is convertible to kana, and then
|
||||
-- potentially get results from both by doing a union between two
|
||||
-- selects.
|
||||
|
||||
CREATE TABLE "JMdict_EntryByKana" (
|
||||
"kana" TEXT NOT NULL,
|
||||
"entryId" INTEGER NOT NULL REFERENCES "JMdict_Entry"("id"),
|
||||
PRIMARY KEY ("kana", "entryId")
|
||||
) WITHOUT ROWID;
|
||||
|
||||
CREATE INDEX "JMdict_EntryByKana_byKana" ON "JMdict_EntryByKana"("kana");
|
||||
|
||||
CREATE TABLE "JMdict_EntryByEnglish" (
|
||||
"english" TEXT NOT NULL,
|
||||
"entryId" INTEGER NOT NULL REFERENCES "JMdict_Entry"("id"),
|
||||
PRIMARY KEY ("english", "entryId")
|
||||
) WITHOUT ROWID;
|
||||
|
||||
CREATE INDEX "JMdict_EntryByEnglish_byEnglish" ON "JMdict_EntryByEnglish"("english");
|
||||
@@ -6,12 +6,6 @@ CREATE TABLE "KANJIDIC_Character" (
|
||||
"jlpt" INTEGER
|
||||
) WITHOUT ROWID;
|
||||
|
||||
CREATE TABLE "KANJIDIC_RadicalName" (
|
||||
"kanji" CHAR(1) NOT NULL REFERENCES "KANJIDIC_Character"("literal"),
|
||||
"name" TEXT NOT NULL,
|
||||
PRIMARY KEY("kanji", "name")
|
||||
) WITHOUT ROWID;
|
||||
|
||||
CREATE TABLE "KANJIDIC_Codepoint" (
|
||||
"kanji" CHAR(1) NOT NULL REFERENCES "KANJIDIC_Character"("literal"),
|
||||
"type" VARCHAR(6) NOT NULL CHECK ("type" IN ('jis208', 'jis212', 'jis213', 'ucs')),
|
||||
@@ -22,12 +16,25 @@ CREATE TABLE "KANJIDIC_Codepoint" (
|
||||
CREATE INDEX "KANJIDIC_Codepoint_byCharacter" ON "KANJIDIC_Codepoint"("kanji");
|
||||
|
||||
CREATE TABLE "KANJIDIC_Radical" (
|
||||
"kanji" CHAR(1) NOT NULL REFERENCES "KANJIDIC_Character"("literal"),
|
||||
"type" VARCHAR(9) NOT NULL CHECK ("type" IN ('classical', 'nelson_c')),
|
||||
"radical" INTEGER NOT NULL CHECK ("radical" BETWEEN 1 AND IIF("type" = 'classical', 214, 213)),
|
||||
PRIMARY KEY("kanji", "type")
|
||||
"kanji" CHAR(1) NOT NULL PRIMARY KEY REFERENCES "KANJIDIC_Character"("literal"),
|
||||
"radicalId" INTEGER NOT NULL CHECK ("radicalId" BETWEEN 1 AND 214)
|
||||
) WITHOUT ROWID;
|
||||
|
||||
CREATE INDEX "KANJIDIC_Radical_byRadicalId" ON "KANJIDIC_Radical"("radicalId");
|
||||
|
||||
CREATE TABLE "KANJIDIC_RadicalNelsonCId" (
|
||||
"radicalId" INTEGER NOT NULL PRIMARY KEY CHECK ("radicalId" BETWEEN 1 AND 214),
|
||||
"nelsonId" INTEGER UNIQUE NOT NULL CHECK ("nelsonId" BETWEEN 1 AND 213)
|
||||
);
|
||||
|
||||
CREATE TABLE "KANJIDIC_RadicalName" (
|
||||
"radicalId" INTEGER NOT NULL CHECK ("radicalId" BETWEEN 1 AND 214),
|
||||
"name" TEXT NOT NULL,
|
||||
PRIMARY KEY("radicalId", "name")
|
||||
) WITHOUT ROWID;
|
||||
|
||||
CREATE INDEX "KANJIDIC_RadicalName_byRadicalId" ON "KANJIDIC_RadicalName"("radicalId");
|
||||
|
||||
CREATE TABLE "KANJIDIC_StrokeMiscount" (
|
||||
"kanji" CHAR(1) NOT NULL REFERENCES "KANJIDIC_Character"("literal"),
|
||||
"strokeCount" INTEGER NOT NULL,
|
||||
@@ -106,6 +113,7 @@ CREATE TABLE "KANJIDIC_QueryCode" (
|
||||
"code" VARCHAR(7) NOT NULL,
|
||||
"type" VARCHAR(11) NOT NULL CHECK ("type" IN ('skip', 'sh_desc', 'four_corner', 'deroo', 'misclass')),
|
||||
"SKIPMisclassification" VARCHAR(15),
|
||||
CHECK ("SKIPMisclassification" IS NULL OR "type" = 'skip'),
|
||||
PRIMARY KEY ("kanji", "type", "code")
|
||||
) WITHOUT ROWID;
|
||||
|
||||
@@ -120,30 +128,39 @@ CREATE INDEX "KANJIDIC_Reading_byReading" ON "KANJIDIC_Reading"("reading");
|
||||
|
||||
CREATE TABLE "KANJIDIC_Kunyomi" (
|
||||
"kanji" CHAR(1) NOT NULL REFERENCES "KANJIDIC_Character"("literal"),
|
||||
"orderNum" INTEGER NOT NULL,
|
||||
"yomi" TEXT NOT NULL,
|
||||
"isJouyou" BOOLEAN,
|
||||
UNIQUE("kanji", "orderNum"),
|
||||
PRIMARY KEY ("kanji", "yomi")
|
||||
) WITHOUT ROWID;
|
||||
|
||||
CREATE INDEX "KANJIDIC_Kunyomi_byKanji_byOrderNum" ON "KANJIDIC_Kunyomi"("kanji", "orderNum");
|
||||
CREATE INDEX "KANJIDIC_Kunyomi_byYomi" ON "KANJIDIC_Kunyomi"("yomi");
|
||||
|
||||
CREATE TABLE "KANJIDIC_Onyomi" (
|
||||
"kanji" CHAR(1) NOT NULL REFERENCES "KANJIDIC_Character"("literal"),
|
||||
"orderNum" INTEGER NOT NULL,
|
||||
"yomi" TEXT NOT NULL,
|
||||
"type" VARCHAR(7) CHECK ("type" IN ('kan', 'go', 'tou', 'kan''you')),
|
||||
"isJouyou" BOOLEAN,
|
||||
UNIQUE("kanji", "orderNum"),
|
||||
PRIMARY KEY ("kanji", "yomi")
|
||||
) WITHOUT ROWID;
|
||||
|
||||
CREATE INDEX "KANJIDIC_Onyomi_byKanji_byOrderNum" ON "KANJIDIC_Onyomi"("kanji", "orderNum");
|
||||
CREATE INDEX "KANJIDIC_Onyomi_byYomi" ON "KANJIDIC_Onyomi"("yomi");
|
||||
|
||||
CREATE TABLE "KANJIDIC_Meaning" (
|
||||
"kanji" CHAR(1) NOT NULL REFERENCES "KANJIDIC_Character"("literal"),
|
||||
"orderNum" INTEGER NOT NULL,
|
||||
"language" CHAR(3) NOT NULL DEFAULT "eng",
|
||||
"meaning" TEXT NOT NULL,
|
||||
UNIQUE("kanji", "orderNum"),
|
||||
PRIMARY KEY ("kanji", "language", "meaning")
|
||||
) WITHOUT ROWID;
|
||||
|
||||
CREATE INDEX "KANJIDIC_Meaning_byKanji_byOrderNum" ON "KANJIDIC_Meaning"("kanji", "orderNum");
|
||||
CREATE INDEX "KANJIDIC_Meaning_byMeaning" ON "KANJIDIC_Meaning"("meaning");
|
||||
|
||||
CREATE TABLE "KANJIDIC_Nanori" (
|
||||
@@ -1,13 +1,11 @@
|
||||
CREATE TABLE "XREF__KANJIDIC_Radical__RADKFILE"(
|
||||
"radicalId" INTEGER NOT NULL,
|
||||
"radicalSymbol" CHAR(1) NOT NULL REFERENCES "RADKFILE"("radical"),
|
||||
"radicalType" VARCHAR(9) NOT NULL CHECK ("radicalType" IN ('classical', 'nelson_c')) DEFAULT 'classical',
|
||||
PRIMARY KEY ("radicalId", "radicalSymbol", "radicalType"),
|
||||
FOREIGN KEY ("radicalId", "radicalType") REFERENCES "KANJIDIC_Radical"("radical", "type")
|
||||
"radicalId" INTEGER NOT NULL CHECK ("radicalId" BETWEEN 1 AND 214),
|
||||
"radicalSymbol" CHAR(1) UNIQUE NOT NULL REFERENCES "RADKFILE"("radical"),
|
||||
PRIMARY KEY ("radicalId", "radicalSymbol")
|
||||
) WITHOUT ROWID;
|
||||
|
||||
CREATE INDEX "XREF__KANJIDIC_Radical__RADKFILE__byRadicalId" ON "XREF__KANJIDIC_Radical__RADKFILE"("radicalId");
|
||||
CREATE INDEX "XREF__KANJIDIC_Radical__RADKFILE__byRadicalSymbol_byRadicalType" ON "XREF__KANJIDIC_Radical__RADKFILE"("radicalSymbol", "radicalType");
|
||||
CREATE INDEX "XREF__KANJIDIC_Radical__RADKFILE__byRadicalSymbol" ON "XREF__KANJIDIC_Radical__RADKFILE"("radicalSymbol");
|
||||
|
||||
/* Source: https://ctext.org/kangxi-zidian */
|
||||
INSERT INTO "XREF__KANJIDIC_Radical__RADKFILE"("radicalId", "radicalSymbol") VALUES
|
||||
89
migrations/0010_Views.sql
Normal file
89
migrations/0010_Views.sql
Normal file
@@ -0,0 +1,89 @@
|
||||
CREATE VIEW "JMdict_EntryByKana"("kana", "entryId")
|
||||
AS
|
||||
SELECT
|
||||
"JMdict_ReadingElement"."reading" AS "kana",
|
||||
"JMdict_ReadingElement"."entryId" AS "entryId"
|
||||
FROM "JMdict_ReadingElement";
|
||||
|
||||
|
||||
CREATE VIEW "JMdict_EntryByEnglish"("english", "entryId")
|
||||
AS
|
||||
SELECT
|
||||
"JMdict_SenseGlossary"."phrase" AS "english",
|
||||
"JMdict_Sense"."senseId" AS "entryId"
|
||||
FROM "JMdict_SenseGlossary" JOIN "JMdict_Sense" USING("senseId");
|
||||
|
||||
|
||||
CREATE VIEW "JMdict_BaseAndFurigana"("entryId", "base", "furigana", "isFirst", "kanjiOrderNum", "readingOrderNum")
|
||||
AS
|
||||
SELECT
|
||||
"JMdict_Entry"."entryId" AS "entryId",
|
||||
CASE WHEN (
|
||||
"JMdict_KanjiElement"."reading" IS NOT NULL
|
||||
AND NOT "JMdict_ReadingElement"."readingDoesNotMatchKanji"
|
||||
)
|
||||
THEN "JMdict_KanjiElement"."reading"
|
||||
ELSE "JMdict_ReadingElement"."reading"
|
||||
END AS "base",
|
||||
CASE WHEN (
|
||||
"JMdict_KanjiElement"."reading" IS NOT NULL
|
||||
AND NOT "JMdict_ReadingElement"."readingDoesNotMatchKanji"
|
||||
)
|
||||
THEN "JMdict_ReadingElement"."reading"
|
||||
ELSE NULL
|
||||
END AS "furigana",
|
||||
COALESCE("JMdict_KanjiElement"."orderNum", 1)
|
||||
+ "JMdict_ReadingElement"."orderNum"
|
||||
= 2
|
||||
AS "isFirst",
|
||||
"JMdict_KanjiElement"."orderNum" AS "kanjiOrderNum",
|
||||
"JMdict_ReadingElement"."orderNum" AS "readingOrderNum"
|
||||
FROM "JMdict_Entry"
|
||||
LEFT JOIN "JMdict_KanjiElement" USING("entryId")
|
||||
LEFT JOIN "JMdict_ReadingElement" USING("entryId");
|
||||
|
||||
CREATE VIEW "JMdict_EntryCommon"("entryId")
|
||||
AS
|
||||
SELECT DISTINCT "entryId"
|
||||
FROM "JMdict_EntryScore"
|
||||
WHERE "JMdict_EntryScore"."common" = 1;
|
||||
|
||||
-- TODO: Make it possible to match words that contain the
|
||||
-- kanji as an infix
|
||||
|
||||
CREATE VIEW "KANJIDIC_ExampleEntries"("kanji", "entryId")
|
||||
AS
|
||||
SELECT
|
||||
"JMdict_KanjiElement"."entryId",
|
||||
"KANJIDIC_Character"."literal" AS "kanji",
|
||||
"JMdict_KanjiElement"."reading"
|
||||
FROM
|
||||
"KANJIDIC_Character"
|
||||
JOIN "JMdict_KanjiElementFTS"
|
||||
ON "JMdict_KanjiElementFTS"."reading" MATCH "KANJIDIC_Character"."literal" || '*'
|
||||
JOIN "JMdict_KanjiElement"
|
||||
ON "JMdict_KanjiElementFTS"."entryId" = "JMdict_KanjiElement"."entryId"
|
||||
AND "JMdict_KanjiElementFTS"."reading" LIKE '%' || "JMdict_KanjiElement"."reading"
|
||||
JOIN "JMdict_EntryScore"
|
||||
ON "JMdict_EntryScore"."type" = 'k'
|
||||
AND "JMdict_KanjiElement"."entryId" = "JMdict_EntryScore"."entryId"
|
||||
AND "JMdict_KanjiElement"."reading" = "JMdict_EntryScore"."reading"
|
||||
WHERE "JMdict_EntryScore"."common" = 1;
|
||||
|
||||
|
||||
CREATE VIEW "RADKFILE_Radicals" AS
|
||||
SELECT DISTINCT "radical" FROM "RADKFILE";
|
||||
|
||||
CREATE VIEW "JMdict_CombinedEntryScore"
|
||||
AS
|
||||
SELECT
|
||||
CASE
|
||||
WHEN "JMdict_EntryScore"."type" = 'k'
|
||||
THEN (SELECT entryId FROM "JMdict_KanjiElement" WHERE "elementId" = "JMdict_EntryScore"."elementId")
|
||||
WHEN "JMdict_EntryScore"."type" = 'r'
|
||||
THEN (SELECT entryId FROM "JMdict_ReadingElement" WHERE "elementId" = "JMdict_EntryScore"."elementId")
|
||||
END AS "entryId",
|
||||
MAX("JMdict_EntryScore"."score") AS "score",
|
||||
MAX("JMdict_EntryScore"."common") AS "common"
|
||||
FROM "JMdict_EntryScore"
|
||||
GROUP BY "entryId";
|
||||
@@ -7,6 +7,7 @@
|
||||
radkfile,
|
||||
kanjidic2,
|
||||
sqlite,
|
||||
wal ? false,
|
||||
}:
|
||||
stdenvNoCC.mkDerivation {
|
||||
name = "jadb";
|
||||
@@ -16,7 +17,7 @@ stdenvNoCC.mkDerivation {
|
||||
database-tool
|
||||
sqlite
|
||||
];
|
||||
|
||||
|
||||
buildPhase = ''
|
||||
runHook preBuild
|
||||
|
||||
@@ -29,7 +30,9 @@ stdenvNoCC.mkDerivation {
|
||||
sqlite3 jadb.sqlite < "$migration"
|
||||
done
|
||||
|
||||
"${lib.getExe database-tool}" create-db --libsqlite "${sqlite.out}/lib/libsqlite3.so"
|
||||
"${lib.getExe database-tool}" create-db \
|
||||
${lib.optionalString wal "--wal"} \
|
||||
--libsqlite "${sqlite.out}/lib/libsqlite3.so"
|
||||
|
||||
runHook postBuild
|
||||
'';
|
||||
|
||||
@@ -10,6 +10,7 @@
|
||||
stdenvNoCC.mkDerivation {
|
||||
name = "docs";
|
||||
src = database;
|
||||
|
||||
nativeBuildInputs = [
|
||||
sqlite
|
||||
schemaspy
|
||||
|
||||
@@ -9,16 +9,17 @@
|
||||
stdenvNoCC.mkDerivation {
|
||||
name = "jmdict";
|
||||
|
||||
dontUnpack = true;
|
||||
srcs = [
|
||||
jmdict-src
|
||||
jmdict-with-examples-src
|
||||
];
|
||||
dontUnpack = true;
|
||||
|
||||
nativeBuildInputs = [
|
||||
gzip
|
||||
xmlformat
|
||||
];
|
||||
|
||||
buildPhase = ''
|
||||
runHook preBuild
|
||||
|
||||
|
||||
@@ -15,6 +15,7 @@ stdenvNoCC.mkDerivation {
|
||||
gzip
|
||||
xmlformat
|
||||
];
|
||||
|
||||
buildPhase = ''
|
||||
runHook preBuild
|
||||
|
||||
|
||||
@@ -15,6 +15,7 @@ stdenv.mkDerivation {
|
||||
gzip
|
||||
iconv
|
||||
];
|
||||
|
||||
buildPhase = ''
|
||||
runHook preBuild
|
||||
|
||||
|
||||
28
pubspec.lock
28
pubspec.lock
@@ -13,10 +13,10 @@ packages:
|
||||
dependency: transitive
|
||||
description:
|
||||
name: analyzer
|
||||
sha256: "13c1e6c6fd460522ea840abec3f677cc226f5fec7872c04ad7b425517ccf54f7"
|
||||
sha256: "904ae5bb474d32c38fb9482e2d925d5454cda04ddd0e55d2e6826bc72f6ba8c0"
|
||||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "7.4.4"
|
||||
version: "7.4.5"
|
||||
args:
|
||||
dependency: "direct main"
|
||||
description:
|
||||
@@ -69,10 +69,10 @@ packages:
|
||||
dependency: transitive
|
||||
description:
|
||||
name: coverage
|
||||
sha256: "9086475ef2da7102a0c0a4e37e1e30707e7fb7b6d28c209f559a9c5f8ce42016"
|
||||
sha256: "802bd084fb82e55df091ec8ad1553a7331b61c08251eef19a508b6f3f3a9858d"
|
||||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "1.12.0"
|
||||
version: "1.13.1"
|
||||
crypto:
|
||||
dependency: transitive
|
||||
description:
|
||||
@@ -189,10 +189,10 @@ packages:
|
||||
dependency: transitive
|
||||
description:
|
||||
name: meta
|
||||
sha256: e3641ec5d63ebf0d9b41bd43201a66e3fc79a65db5f61fc181f04cd27aab950c
|
||||
sha256: "23f08335362185a5ea2ad3a4e597f1375e78bce8a040df5c600c8d3552ef2394"
|
||||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "1.16.0"
|
||||
version: "1.17.0"
|
||||
mime:
|
||||
dependency: transitive
|
||||
description:
|
||||
@@ -373,26 +373,26 @@ packages:
|
||||
dependency: "direct dev"
|
||||
description:
|
||||
name: test
|
||||
sha256: "301b213cd241ca982e9ba50266bd3f5bd1ea33f1455554c5abb85d1be0e2d87e"
|
||||
sha256: "0561f3a2cfd33d10232360f16dfcab9351cfb7ad9b23e6cd6e8c7fb0d62c7ac3"
|
||||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "1.25.15"
|
||||
version: "1.26.1"
|
||||
test_api:
|
||||
dependency: transitive
|
||||
description:
|
||||
name: test_api
|
||||
sha256: fb31f383e2ee25fbbfe06b40fe21e1e458d14080e3c67e7ba0acfde4df4e0bbd
|
||||
sha256: "522f00f556e73044315fa4585ec3270f1808a4b186c936e612cab0b565ff1e00"
|
||||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "0.7.4"
|
||||
version: "0.7.6"
|
||||
test_core:
|
||||
dependency: transitive
|
||||
description:
|
||||
name: test_core
|
||||
sha256: "84d17c3486c8dfdbe5e12a50c8ae176d15e2a771b96909a9442b40173649ccaa"
|
||||
sha256: "8619a9a45be044b71fe2cd6b77b54fd60f1c67904c38d48706e2852a2bda1c60"
|
||||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "0.6.8"
|
||||
version: "0.6.10"
|
||||
typed_data:
|
||||
dependency: transitive
|
||||
description:
|
||||
@@ -429,10 +429,10 @@ packages:
|
||||
dependency: transitive
|
||||
description:
|
||||
name: web_socket
|
||||
sha256: bfe6f435f6ec49cb6c01da1e275ae4228719e59a6b067048c51e72d9d63bcc4b
|
||||
sha256: "34d64019aa8e36bf9842ac014bb5d2f5586ca73df5e4d9bf5c936975cae6982c"
|
||||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "1.0.0"
|
||||
version: "1.0.1"
|
||||
web_socket_channel:
|
||||
dependency: transitive
|
||||
description:
|
||||
|
||||
10
pubspec.yaml
10
pubspec.yaml
@@ -4,15 +4,15 @@ version: 1.0.0
|
||||
homepage: https://git.pvv.ntnu.no/oysteikt/jadb
|
||||
|
||||
environment:
|
||||
sdk: '>=3.0.0 <4.0.0'
|
||||
sdk: '>=3.2.0 <4.0.0'
|
||||
|
||||
dependencies:
|
||||
args: ^2.7.0
|
||||
collection: ^1.19.1
|
||||
collection: ^1.19.0
|
||||
csv: ^6.0.0
|
||||
equatable: ^2.0.7
|
||||
sqflite_common: ^2.5.5
|
||||
sqflite_common_ffi: ^2.3.5
|
||||
equatable: ^2.0.0
|
||||
sqflite_common: ^2.5.0
|
||||
sqflite_common_ffi: ^2.3.0
|
||||
xml: ^6.5.0
|
||||
|
||||
dev_dependencies:
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
import 'package:collection/collection.dart';
|
||||
import 'package:jadb/util/jouyou_kanji.dart';
|
||||
import 'package:jadb/const_data/kanji_grades.dart';
|
||||
import 'package:test/test.dart';
|
||||
|
||||
void main() {
|
||||
test("Assert 2136 kanji in jouyou set", () {
|
||||
expect(JOUYOU_KANJI.values.flattenedToSet.length, 2136);
|
||||
expect(JOUYOU_KANJI_BY_GRADES.values.flattenedToSet.length, 2136);
|
||||
});
|
||||
}
|
||||
|
||||
33
test/models/create_empty_db_test.dart
Normal file
33
test/models/create_empty_db_test.dart
Normal file
@@ -0,0 +1,33 @@
|
||||
import 'dart:ffi';
|
||||
import 'dart:io';
|
||||
|
||||
import 'package:jadb/models/create_empty_db.dart';
|
||||
import 'package:jadb/search.dart';
|
||||
import 'package:sqflite_common_ffi/sqflite_ffi.dart';
|
||||
import 'package:test/test.dart';
|
||||
import 'package:sqlite3/open.dart';
|
||||
|
||||
Future<DatabaseExecutor> setup_inmemory_database() async {
|
||||
final libsqlitePath = Platform.environment['LIBSQLITE_PATH'];
|
||||
|
||||
if (libsqlitePath == null) {
|
||||
throw Exception("LIBSQLITE_PATH is not set");
|
||||
}
|
||||
|
||||
final db_connection = await createDatabaseFactoryFfi(
|
||||
ffiInit: () =>
|
||||
open.overrideForAll(() => DynamicLibrary.open(libsqlitePath)),
|
||||
).openDatabase(':memory:');
|
||||
|
||||
return db_connection;
|
||||
}
|
||||
|
||||
void main() {
|
||||
test("Create empty db", () async {
|
||||
final db = await setup_inmemory_database();
|
||||
|
||||
await createEmptyDb(db);
|
||||
|
||||
await db.jadbVerifyTables();
|
||||
});
|
||||
}
|
||||
32
test/search/filter_kanji_test.dart
Normal file
32
test/search/filter_kanji_test.dart
Normal file
@@ -0,0 +1,32 @@
|
||||
import 'package:jadb/search.dart';
|
||||
import 'package:test/test.dart';
|
||||
|
||||
import 'setup_database_connection.dart';
|
||||
|
||||
void main() {
|
||||
test("Filter kanji", () async {
|
||||
final connection = await setup_database_connection();
|
||||
|
||||
final result = await connection.filterKanji(
|
||||
[
|
||||
"a",
|
||||
"b",
|
||||
"c",
|
||||
"漢",
|
||||
"字",
|
||||
"地",
|
||||
"字",
|
||||
"か",
|
||||
"な",
|
||||
".",
|
||||
"!",
|
||||
"@",
|
||||
";",
|
||||
"々",
|
||||
],
|
||||
deduplicate: false,
|
||||
);
|
||||
|
||||
expect(result.join(), "漢字地字");
|
||||
});
|
||||
}
|
||||
@@ -1,4 +1,5 @@
|
||||
import 'package:jadb/util/jouyou_kanji.dart';
|
||||
import 'package:jadb/const_data/kanji_grades.dart';
|
||||
import 'package:jadb/search.dart';
|
||||
import 'package:test/test.dart';
|
||||
|
||||
import 'setup_database_connection.dart';
|
||||
@@ -7,17 +8,17 @@ void main() {
|
||||
test("Search a kanji", () async {
|
||||
final connection = await setup_database_connection();
|
||||
|
||||
final result = await connection.searchKanji('漢');
|
||||
final result = await connection.jadbSearchKanji('漢');
|
||||
expect(result, isNotNull);
|
||||
});
|
||||
|
||||
group("Search all jouyou kanji", () {
|
||||
JOUYOU_KANJI.forEach((grade, characters) {
|
||||
JOUYOU_KANJI_BY_GRADES.forEach((grade, characters) {
|
||||
test("Search all kanji in grade $grade", () async {
|
||||
final connection = await setup_database_connection();
|
||||
|
||||
for (final character in characters) {
|
||||
final result = await connection.searchKanji(character);
|
||||
final result = await connection.jadbSearchKanji(character);
|
||||
expect(result, isNotNull);
|
||||
}
|
||||
}, timeout: Timeout.factor(10));
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
import 'dart:io';
|
||||
|
||||
import 'package:jadb/_data_ingestion/open_local_db.dart';
|
||||
import 'package:jadb/search.dart';
|
||||
import 'package:sqflite_common/sqlite_api.dart';
|
||||
|
||||
Future<JaDBConnection> setup_database_connection() async {
|
||||
Future<Database> setup_database_connection() async {
|
||||
final lib_sqlite_path = Platform.environment['LIBSQLITE_PATH'];
|
||||
final jadb_path = Platform.environment['JADB_PATH'];
|
||||
|
||||
@@ -20,9 +20,5 @@ Future<JaDBConnection> setup_database_connection() async {
|
||||
jadbPath: jadb_path,
|
||||
);
|
||||
|
||||
if (db_connection == null) {
|
||||
throw Exception("Failed to open database");
|
||||
}
|
||||
|
||||
return JaDBConnection(db_connection);
|
||||
return db_connection;
|
||||
}
|
||||
|
||||
@@ -1,12 +1,48 @@
|
||||
import 'package:jadb/search.dart';
|
||||
import 'package:test/test.dart';
|
||||
|
||||
import 'setup_database_connection.dart';
|
||||
|
||||
void main() {
|
||||
test("Search a word", () async {
|
||||
test("Search a word - english - auto", () async {
|
||||
final connection = await setup_database_connection();
|
||||
final result = await connection.jadbSearchWord("kana");
|
||||
expect(result, isNotNull);
|
||||
});
|
||||
|
||||
final result = await connection.searchWord("kana");
|
||||
test("Get word search count - english - auto", () async {
|
||||
final connection = await setup_database_connection();
|
||||
final result = await connection.jadbSearchWordCount("kana");
|
||||
expect(result, isNotNull);
|
||||
});
|
||||
|
||||
test("Search a word - japanese kana - auto", () async {
|
||||
final connection = await setup_database_connection();
|
||||
final result = await connection.jadbSearchWord("かな");
|
||||
expect(result, isNotNull);
|
||||
});
|
||||
|
||||
test("Get word search count - japanese kana - auto", () async {
|
||||
final connection = await setup_database_connection();
|
||||
final result = await connection.jadbSearchWordCount("かな");
|
||||
expect(result, isNotNull);
|
||||
});
|
||||
|
||||
test("Search a word - japanese kanji - auto", () async {
|
||||
final connection = await setup_database_connection();
|
||||
final result = await connection.jadbSearchWord("仮名");
|
||||
expect(result, isNotNull);
|
||||
});
|
||||
|
||||
test("Get word search count - japanese kanji - auto", () async {
|
||||
final connection = await setup_database_connection();
|
||||
final result = await connection.jadbSearchWordCount("仮名");
|
||||
expect(result, isNotNull);
|
||||
});
|
||||
|
||||
test("Get a word by id", () async {
|
||||
final connection = await setup_database_connection();
|
||||
final result = await connection.jadbGetWordById(1577090);
|
||||
expect(result, isNotNull);
|
||||
});
|
||||
|
||||
@@ -17,7 +53,7 @@ void main() {
|
||||
|
||||
// Test serializing all words
|
||||
for (final letter in "aiueoksthnmyrw".split("")) {
|
||||
await connection.searchWord(letter);
|
||||
await connection.jadbSearchWord(letter);
|
||||
}
|
||||
},
|
||||
timeout: Timeout.factor(100),
|
||||
|
||||
Reference in New Issue
Block a user