81 Commits

Author SHA1 Message Date
781e650f0b WIP: use ids for \{kanji,reading\}Element tables 2025-06-24 01:01:07 +02:00
78f546fa28 models/createEmptyDb: init 2025-06-23 21:11:42 +02:00
e0a35bdef9 lib: fix file name style 2025-06-22 12:48:23 +02:00
8ec9771222 word_search: add '.' filter for FTS input 2025-06-20 23:27:15 +02:00
e2fe033bf4 word_search: fix english search, filter '%' 2025-06-20 23:23:09 +02:00
a6a48c196d word_search: filter input for FTS chars 2025-06-20 23:10:10 +02:00
26618cc06d test: remove invalid null check 2025-06-20 23:01:33 +02:00
b855a1dc48 filter_kanji: fix implementation, add test 2025-06-20 22:44:26 +02:00
f8813e0ae3 word_search: add function for retrieving single entry by id 2025-06-20 21:57:43 +02:00
bd0fee1b2d lib: move table_names to separate dir 2025-05-31 16:21:59 +02:00
42e7c95f59 lib: format 2025-05-27 20:02:53 +02:00
b25cc85afe lib/search: make JaDBConnection into extension, add verifyTables 2025-05-27 20:02:13 +02:00
ec14016ab5 lib/util/lemmatizer: init 2025-05-26 17:23:49 +02:00
6eee49d2d1 lib/search/word: order english queries by score 2025-05-23 15:28:00 +02:00
f819280268 migrations: combine score tables 2025-05-23 15:27:56 +02:00
03a8e11d91 JMdict_BaseAndFurigana: move isFirst column out of dart query 2025-05-22 19:57:46 +02:00
fdd15df344 lib/search/filter_kanji: init 2025-05-22 19:45:10 +02:00
0ea8331298 nix: clean up formatting 2025-05-22 16:57:07 +02:00
9215807b5c migrations: move RADKFILE view to Views migration 2025-05-22 16:57:07 +02:00
72a58bc299 migrations: fix file indices 2025-05-22 16:57:06 +02:00
c208ef75f2 migrations: precalculate word search score with triggers 2025-05-22 16:57:06 +02:00
bfcb2bfc97 lib/cli: allow specifying query for kanji/word 2025-05-22 16:57:06 +02:00
52a686ac29 lib/search/word: fix english sql query 2025-05-22 16:57:05 +02:00
8bff2c6799 KANJIDIC: add orderNum to yomi + meaning 2025-05-21 12:57:08 +02:00
b8eba03000 lib/search: use const table names 2025-05-21 12:38:25 +02:00
4d75bef208 migrations: create virtual col common on jmdict kanji/reading elements 2025-05-20 13:08:16 +02:00
08f25f6277 lib/models/jmdict_pos: add shortDescription 2025-05-20 00:03:32 +02:00
7fee0435e8 pubspec.lock: bump packages 2025-05-19 21:46:19 +02:00
73640d01f6 lib/search/kanji: make taughtIn into int? 2025-05-19 20:18:36 +02:00
2875f7a65f lib/search/kanji: allow empty names for radicals 2025-05-19 19:48:17 +02:00
155a3aa658 treewide: format 2025-05-19 16:40:37 +02:00
0351b7a9df lib/search: make optional word args available in public api 2025-05-19 16:40:36 +02:00
3cc61e6ba9 .gitignore: add /doc 2025-05-19 16:40:36 +02:00
ebe29db092 lib/search: document public search functions 2025-05-19 16:40:36 +02:00
128dd14a0d lib/search: generate list of ? instead of interpolation 2025-05-19 16:40:35 +02:00
501d3a089e lib/search/radical_search: implement 2025-05-19 16:40:35 +02:00
e0ffd89ff4 lib/search/word_search: use map comprehensions 2025-05-19 16:40:35 +02:00
e30ed8ba9b lib/search/kanji: add more data to result 2025-05-19 16:40:34 +02:00
d508b5e244 migrations: add constraint for kanjidic skip misclassifications 2025-05-19 16:40:34 +02:00
31c3fb807e treewide: redo handling of kanjidic radicals 2025-05-19 16:40:34 +02:00
60d2017473 lib/search/kanji: add codepoints 2025-05-19 16:40:33 +02:00
135d81db80 lib/search/kanji: query kanji parts 2025-05-19 16:40:33 +02:00
f8549bf55a lib/_data_ingestion/kanjidic: correctly parse radical names from xml 2025-05-19 16:40:33 +02:00
69d86b34ff migrations: remove already fixed TODO 2025-05-19 16:40:30 +02:00
6d7c068e7b lib/search/word_search: query reading/kanji elements in correct order 2025-05-18 23:53:30 +02:00
b6661c734f lib/search/word_search: add word count search 2025-05-16 23:50:01 +02:00
080638e7ef lib/search/word_search: assert no missing rubys 2025-05-16 21:30:33 +02:00
03d536d7d1 nix: add package for database with WAL enabled 2025-05-16 21:29:53 +02:00
3f267b78d3 lib: format 2025-05-16 21:06:19 +02:00
84ae7eca9e lib/search/word_search: move some score calculation to virtual column 2025-05-16 21:03:37 +02:00
45c4c5f09a lib/cli/query-word: stringify 2025-05-16 18:47:26 +02:00
369fcdbd4b lib/search/word_search: fix rubification 2025-05-16 18:46:57 +02:00
33cf5028f4 lib/search/word_search: score and order results by several metrics 2025-05-16 18:46:39 +02:00
90d5717928 migrations: move JMdict views to 0009_views 2025-05-16 18:43:58 +02:00
bb9550380c lib/search/word_search: limit entry id query to single column 2025-05-16 17:06:01 +02:00
3680827176 lib/search/word_search: use const table names 2025-05-16 17:06:01 +02:00
9d9ce840fa lib/search/word_search: split 2025-05-16 17:06:00 +02:00
6c580e95e2 lib/search/word_search: pagination 2025-05-16 17:06:00 +02:00
2db99e76f0 lib/_data_ingestion: perform VACUUM after data ingestion 2025-05-16 17:06:00 +02:00
42db69e57a lib/cli/create_db: make WAL mode optional 2025-05-16 17:05:59 +02:00
4407c06f12 lib/cli: add timing logs to query commands 2025-05-16 17:05:59 +02:00
fc0956d5c3 lib/text_filtering: add kanjiRegex 2025-05-16 17:05:57 +02:00
d7f7f9cd19 Filter data out into separate files 2025-05-16 09:22:28 +02:00
cf95f85caa Add kanji filtering utility 2025-05-16 09:17:18 +02:00
f278b34415 lib/_data_ingestion: enable WAL 2025-05-15 22:31:54 +02:00
30d8160698 migrations: add FTS tables + view for querying example words 2025-05-15 13:03:48 +02:00
b07fc8f4b3 lib/word_search: calculate isCommon 2025-05-14 20:59:57 +02:00
8299572225 migrations: add view for base/furigana queries 2025-05-14 17:12:31 +02:00
78ba1bae1a lib/search: query readings for xrefs 2025-05-14 17:12:31 +02:00
87383c8951 lib/search: query jlpt tags on word search 2025-05-14 17:12:31 +02:00
cd9b318956 lib/cli/create_db: await seeding data 2025-05-14 17:12:30 +02:00
96f52b5860 lib/_data_ingestion/tanos-jlpt: report sql errors 2025-05-14 17:12:30 +02:00
59e8db5add Query more detailed information about langauge source 2025-05-14 17:12:30 +02:00
9038119eb7 Consistently use senseId name everywhere 2025-05-14 17:12:29 +02:00
3290d5dc91 Consistently use entryId name everywhere 2025-05-14 17:12:29 +02:00
4647ab2286 flake.nix: add sqlite-analyzer to devshell 2025-05-14 17:12:29 +02:00
2980bcda06 lib/_data_ingestion/jmdict: format 2025-05-14 17:12:29 +02:00
1661817819 migrations/JMDict: add extra constraints, clean up pkeys, etc. 2025-05-14 17:12:28 +02:00
581f9daf25 lib/_data_ingestion: add phrases for language source data 2025-05-14 17:12:28 +02:00
9898793bca Convert entryBy* tables into views 2025-05-14 17:12:28 +02:00
2e7e8851e1 pubspec.yaml: relax deps 2025-05-13 22:19:02 +02:00
80 changed files with 5743 additions and 3122 deletions

1
.gitignore vendored
View File

@@ -6,6 +6,7 @@
.packages
# Conventional directory for build output.
/doc/
/build/
main.db

View File

@@ -2,6 +2,7 @@ import 'package:args/command_runner.dart';
import 'package:jadb/cli/commands/create_db.dart';
import 'package:jadb/cli/commands/create_tanos_jlpt_mappings.dart';
import 'package:jadb/cli/commands/lemmatize.dart';
import 'package:jadb/cli/commands/query_kanji.dart';
import 'package:jadb/cli/commands/query_word.dart';
@@ -14,6 +15,7 @@ Future<void> main(List<String> args) async {
runner.addCommand(CreateDb());
runner.addCommand(QueryKanji());
runner.addCommand(QueryWord());
runner.addCommand(Lemmatize());
runner.addCommand(CreateTanosJlptMappings());
runner.run(args);

View File

@@ -81,6 +81,7 @@
dart
gnumake
sqlite-interactive
sqlite-analyzer
sqlite-web
sqlint
sqlfluff
@@ -128,6 +129,12 @@
inherit src;
};
database-wal = pkgs.callPackage ./nix/database.nix {
inherit (self.packages.${system}) database-tool jmdict radkfile kanjidic2;
inherit src;
wal = true;
};
docs = pkgs.callPackage ./nix/docs.nix {
inherit (self.packages.${system}) database;
};

View File

@@ -97,7 +97,7 @@ class LanguageSource extends SQLWritable {
const LanguageSource({
required this.language,
this.phrase,
required this.phrase,
this.fullyDescribesSense = true,
this.constructedFromSmallerWords = false,
});
@@ -161,7 +161,7 @@ class XRef {
}
class Sense extends SQLWritable {
final int id;
final int senseId;
final int orderNum;
final List<XRefParts> antonyms;
final List<String> dialects;
@@ -176,7 +176,7 @@ class Sense extends SQLWritable {
final List<XRefParts> seeAlso;
const Sense({
required this.id,
required this.senseId,
required this.orderNum,
this.antonyms = const [],
this.dialects = const [],
@@ -193,11 +193,12 @@ class Sense extends SQLWritable {
@override
Map<String, Object?> get sqlValue => {
'id': id,
'senseId': senseId,
'orderNum': orderNum,
};
bool get isEmpty => antonyms.isEmpty &&
bool get isEmpty =>
antonyms.isEmpty &&
dialects.isEmpty &&
fields.isEmpty &&
info.isEmpty &&
@@ -211,17 +212,17 @@ class Sense extends SQLWritable {
}
class Entry extends SQLWritable {
final int id;
final int entryId;
final List<KanjiElement> kanji;
final List<ReadingElement> readings;
final List<Sense> senses;
const Entry({
required this.id,
required this.entryId,
required this.kanji,
required this.readings,
required this.senses,
});
Map<String, Object?> get sqlValue => {'id': id};
Map<String, Object?> get sqlValue => {'entryId': entryId};
}

View File

@@ -2,8 +2,7 @@ import 'dart:collection';
import 'package:collection/collection.dart';
import 'package:jadb/_data_ingestion/jmdict/objects.dart';
import 'package:jadb/_data_ingestion/jmdict/table_names.dart';
import 'package:jadb/util/romaji_transliteration.dart';
import 'package:jadb/table_names/jmdict.dart';
import 'package:sqflite_common/sqlite_api.dart';
class ResolvedXref {
@@ -72,47 +71,38 @@ ResolvedXref resolveXref(
Future<void> seedJMDictData(List<Entry> entries, Database db) async {
print(' [JMdict] Batch 1 - Kanji and readings');
Batch b = db.batch();
for (final e in entries) {
b.insert(JMdictTableNames.entry, e.sqlValue);
for (final k in e.kanji) {
b.insert(JMdictTableNames.kanjiElement, k.sqlValue..addAll({'entryId': e.id}));
// b.insert(
// JMdictTableNames.entryByKana,
// {'entryId': e.id, 'kana': transliterateKatakanaToHiragana(k.reading)},
// // Some entries have the same reading twice with difference in katakana and hiragana
// conflictAlgorithm: ConflictAlgorithm.ignore,
// );
b.insert(
JMdictTableNames.kanjiElement,
k.sqlValue..addAll({'entryId': e.entryId}),
);
for (final i in k.info) {
b.insert(
JMdictTableNames.kanjiInfo,
{
'entryId': e.id,
'entryId': e.entryId,
'reading': k.reading,
'info': i,
},
);
}
}
for (final r in e.readings) {
b.insert(
JMdictTableNames.readingElement,
r.sqlValue..addAll({'entryId': e.id}),
r.sqlValue..addAll({'entryId': e.entryId}),
);
b.insert(
JMdictTableNames.entryByKana,
{
'entryId': e.id,
'kana': transliterateKanaToLatin(r.reading),
},
// Some entries have the same reading twice with difference in katakana and hiragana
conflictAlgorithm: ConflictAlgorithm.ignore,
);
for (final i in r.info) {
b.insert(
JMdictTableNames.readingInfo,
{
'entryId': e.id,
'entryId': e.entryId,
'reading': r.reading,
'info': i,
},
@@ -122,79 +112,65 @@ Future<void> seedJMDictData(List<Entry> entries, Database db) async {
b.insert(
JMdictTableNames.readingRestriction,
{
'entryId': e.id,
'entryId': e.entryId,
'reading': r.reading,
'restriction': res,
},
);
}
}
for (final s in e.senses) {
for (final g in s.glossary) {
b.insert(
JMdictTableNames.entryByEnglish,
{
'entryId': e.id,
'english': g.phrase,
},
// Some entries have the same reading twice with difference in katakana and hiragana
conflictAlgorithm: ConflictAlgorithm.ignore,
);
}
}
}
await b.commit(noResult: true);
print(' [JMdict] Batch 2 - Senses');
b = db.batch();
for (final e in entries) {
for (final s in e.senses) {
b.insert(JMdictTableNames.sense, s.sqlValue..addAll({'entryId': e.id}));
b.insert(
JMdictTableNames.sense, s.sqlValue..addAll({'entryId': e.entryId}));
for (final d in s.dialects) {
b.insert(JMdictTableNames.senseDialect, {'senseId': s.id, 'dialect': d});
b.insert(
JMdictTableNames.senseDialect,
{'senseId': s.senseId, 'dialect': d},
);
}
for (final f in s.fields) {
b.insert(JMdictTableNames.senseField, {'senseId': s.id, 'field': f});
b.insert(
JMdictTableNames.senseField, {'senseId': s.senseId, 'field': f});
}
for (final i in s.info) {
b.insert(JMdictTableNames.senseInfo, {'senseId': s.id, 'info': i});
b.insert(JMdictTableNames.senseInfo, {'senseId': s.senseId, 'info': i});
}
for (final m in s.misc) {
b.insert(JMdictTableNames.senseMisc, {'senseId': s.id, 'misc': m});
b.insert(JMdictTableNames.senseMisc, {'senseId': s.senseId, 'misc': m});
}
for (final p in s.pos) {
b.insert(JMdictTableNames.sensePOS, {'senseId': s.id, 'pos': p});
}
for (final l in s.languageSource) {
b.insert(
JMdictTableNames.senseLanguageSource,
l.sqlValue..addAll({'senseId': s.id}),
);
b.insert(JMdictTableNames.sensePOS, {'senseId': s.senseId, 'pos': p});
}
for (final rk in s.restrictedToKanji) {
b.insert(
JMdictTableNames.senseRestrictedToKanji,
{'entryId': e.id, 'senseId': s.id, 'kanji': rk},
{'entryId': e.entryId, 'senseId': s.senseId, 'kanji': rk},
);
}
for (final rr in s.restrictedToReading) {
b.insert(
JMdictTableNames.senseRestrictedToReading,
{'entryId': e.id, 'senseId': s.id, 'reading': rr},
{'entryId': e.entryId, 'senseId': s.senseId, 'reading': rr},
);
}
for (final ls in s.languageSource) {
b.insert(
JMdictTableNames.senseLanguageSource,
ls.sqlValue..addAll({'senseId': s.id}),
ls.sqlValue..addAll({'senseId': s.senseId}),
);
}
for (final g in s.glossary) {
b.insert(
JMdictTableNames.senseGlossary,
g.sqlValue..addAll({'senseId': s.id}),
g.sqlValue..addAll({'senseId': s.senseId}),
);
}
}
@@ -204,6 +180,7 @@ Future<void> seedJMDictData(List<Entry> entries, Database db) async {
print(' [JMdict] Building xref trees');
SplayTreeMap<String, Set<Entry>> entriesByKanji = SplayTreeMap();
for (final entry in entries) {
for (final kanji in entry.kanji) {
if (entriesByKanji.containsKey(kanji.reading)) {
@@ -239,8 +216,8 @@ Future<void> seedJMDictData(List<Entry> entries, Database db) async {
b.insert(
JMdictTableNames.senseSeeAlso,
{
'senseId': s.id,
'xrefEntryId': resolvedEntry.entry.id,
'senseId': s.senseId,
'xrefEntryId': resolvedEntry.entry.entryId,
'seeAlsoKanji': xref.kanjiRef,
'seeAlsoReading': xref.readingRef,
'seeAlsoSense': xref.senseOrderNum,
@@ -257,8 +234,8 @@ Future<void> seedJMDictData(List<Entry> entries, Database db) async {
);
b.insert(JMdictTableNames.senseAntonyms, {
'senseId': s.id,
'xrefEntryId': resolvedEntry.entry.id,
'senseId': s.senseId,
'xrefEntryId': resolvedEntry.entry.entryId,
'antonymKanji': ant.kanjiRef,
'antonymReading': ant.readingRef,
'antonymSense': ant.senseOrderNum,

View File

@@ -127,7 +127,7 @@ List<Entry> parseJMDictData(XmlElement root) {
for (final (orderNum, sense) in entry.findElements('sense').indexed) {
senseId++;
final result = Sense(
id: senseId,
senseId: senseId,
orderNum: orderNum + 1,
restrictedToKanji:
sense.findElements('stagk').map((e) => e.innerText).toList(),
@@ -151,6 +151,7 @@ List<Entry> parseJMDictData(XmlElement root) {
.map(
(e) => LanguageSource(
language: e.getAttribute('xml:lang') ?? 'eng',
phrase: e.innerText.isNotEmpty ? e.innerText : null,
fullyDescribesSense: e.getAttribute('ls_type') == 'part',
constructedFromSmallerWords: e.getAttribute('ls_wasei') == 'y',
),
@@ -189,7 +190,7 @@ List<Entry> parseJMDictData(XmlElement root) {
entries.add(
Entry(
id: entryId,
entryId: entryId,
kanji: kanjiEls,
readings: readingEls,
senses: senses,

View File

@@ -21,20 +21,17 @@ class CodePoint extends SQLWritable {
class Radical extends SQLWritable {
final String kanji;
final String type;
final String radical;
final int radicalId;
const Radical({
required this.kanji,
required this.type,
required this.radical,
required this.radicalId,
});
@override
Map<String, Object?> get sqlValue => {
'kanji': kanji,
'type': type,
'radical': radical,
'radicalId': radicalId,
};
}
@@ -224,7 +221,7 @@ class Character extends SQLWritable {
final List<String> radicalName;
final List<CodePoint> codepoints;
final List<Radical> radicals;
final Radical? radical;
final List<int> strokeMiscounts;
final List<Variant> variants;
final List<DictionaryReference> dictionaryReferences;
@@ -244,7 +241,7 @@ class Character extends SQLWritable {
this.jlpt,
this.radicalName = const [],
this.codepoints = const [],
this.radicals = const [],
required this.radical,
this.strokeMiscounts = const [],
this.variants = const [],
this.dictionaryReferences = const [],

View File

@@ -1,4 +1,4 @@
import 'package:jadb/_data_ingestion/kanjidic/table_names.dart';
import 'package:jadb/table_names/kanjidic.dart';
import 'package:sqflite_common/sqlite_api.dart';
import 'objects.dart';
@@ -14,14 +14,24 @@ Future<void> seedKANJIDICData(List<Character> characters, Database db) async {
// print(c.dictionaryReferences.map((e) => e.sqlValue).toList());
// }
b.insert(KANJIDICTableNames.character, c.sqlValue);
for (final n in c.radicalName) {
b.insert(KANJIDICTableNames.radicalName, {'kanji': c.literal, 'name': n});
assert(c.radical != null, 'Radical name without radical');
b.insert(
KANJIDICTableNames.radicalName,
{
'radicalId': c.radical!.radicalId,
'name': n,
},
conflictAlgorithm: ConflictAlgorithm.ignore,
);
}
for (final cp in c.codepoints) {
b.insert(KANJIDICTableNames.codepoint, cp.sqlValue);
}
for (final r in c.radicals) {
b.insert(KANJIDICTableNames.radical, r.sqlValue);
if (c.radical != null) {
b.insert(KANJIDICTableNames.radical, c.radical!.sqlValue);
}
for (final sm in c.strokeMiscounts) {
b.insert(
@@ -52,14 +62,17 @@ Future<void> seedKANJIDICData(List<Character> characters, Database db) async {
for (final r in c.readings) {
b.insert(KANJIDICTableNames.reading, r.sqlValue);
}
for (final k in c.kunyomi) {
b.insert(KANJIDICTableNames.kunyomi, k.sqlValue);
for (final (i, y) in c.kunyomi.indexed) {
b.insert(
KANJIDICTableNames.kunyomi, y.sqlValue..addAll({'orderNum': i + 1}));
}
for (final o in c.onyomi) {
b.insert(KANJIDICTableNames.onyomi, o.sqlValue);
for (final (i, y) in c.onyomi.indexed) {
b.insert(
KANJIDICTableNames.onyomi, y.sqlValue..addAll({'orderNum': i + 1}));
}
for (final m in c.meanings) {
b.insert(KANJIDICTableNames.meaning, m.sqlValue);
for (final (i, m) in c.meanings.indexed) {
b.insert(
KANJIDICTableNames.meaning, m.sqlValue..addAll({'orderNum': i + 1}));
}
for (final n in c.nanori) {
b.insert(

View File

@@ -27,11 +27,8 @@ List<Character> parseKANJIDICData(XmlElement root) {
jlpt: int.tryParse(
misc.findElements('jlpt').firstOrNull?.innerText ?? '',
),
radicalName: radical
?.findElements('rad_name')
.map((e) => e.innerText)
.toList() ??
[],
radicalName:
misc.findElements('rad_name').map((e) => e.innerText).toList(),
codepoints: codepoint
?.findElements('cp_value')
.map(
@@ -43,17 +40,16 @@ List<Character> parseKANJIDICData(XmlElement root) {
)
.toList() ??
[],
radicals: radical
?.findElements('rad_value')
.map(
(e) => Radical(
kanji: kanji,
type: e.getAttribute('rad_type')!,
radical: e.innerText,
),
)
.toList() ??
[],
radical: radical
?.findElements('rad_value')
.where((e) => e.getAttribute('rad_type') == 'classical')
.map(
(e) => Radical(
kanji: kanji,
radicalId: int.parse(e.innerText),
),
)
.firstOrNull,
strokeMiscounts: misc
.findElements('stroke_count')
.skip(1)

View File

@@ -1,10 +1,7 @@
import 'dart:ffi';
import 'dart:io';
import 'package:jadb/_data_ingestion/jmdict/table_names.dart';
import 'package:jadb/_data_ingestion/kanjidic/table_names.dart';
import 'package:jadb/_data_ingestion/radkfile/table_names.dart';
import 'package:jadb/_data_ingestion/tanos-jlpt/table_names.dart';
import 'package:jadb/search.dart';
import 'package:sqflite_common_ffi/sqflite_ffi.dart';
import 'package:sqlite3/open.dart';
@@ -12,7 +9,8 @@ Future<Database> openLocalDb({
String? libsqlitePath,
String? jadbPath,
bool readWrite = false,
bool assertTablesExist = true,
bool verifyTablesExist = true,
bool walMode = false,
}) async {
libsqlitePath ??= Platform.environment['LIBSQLITE_PATH'];
jadbPath ??= Platform.environment['JADB_PATH'];
@@ -41,50 +39,19 @@ Future<Database> openLocalDb({
).openDatabase(
jadbPath,
options: OpenDatabaseOptions(
onOpen: (db) {
db.execute("PRAGMA foreign_keys = ON");
onConfigure: (db) async {
if (walMode) {
await db.execute("PRAGMA journal_mode = WAL");
}
await db.execute("PRAGMA foreign_keys = ON");
},
readOnly: !readWrite,
),
);
if (assertTablesExist) {
await _assertTablesExist(db);
if (verifyTablesExist) {
await db.jadbVerifyTables();
}
return db;
}
Future<void> _assertTablesExist(Database db) async {
final Set<String> tables = await db
.query(
'sqlite_master',
columns: ['name'],
where: 'type = ?',
whereArgs: ['table'],
)
.then((result) {
return result.map((row) => row['name'] as String).toSet();
});
final Set<String> expectedTables = {
...JMdictTableNames.allTables,
...KANJIDICTableNames.allTables,
...RADKFILETableNames.allTables,
...TanosJLPTTableNames.allTables,
};
final missingTables = expectedTables.difference(tables);
if (missingTables.isNotEmpty) {
throw Exception([
'Missing tables:',
missingTables.map((table) => ' - $table').join('\n'),
'',
'Found tables:\n',
tables.map((table) => ' - $table').join('\n'),
'',
'Please ensure the database is correctly set up.',
].join('\n'));
}
}

View File

@@ -1,4 +1,4 @@
import 'package:jadb/_data_ingestion/radkfile/table_names.dart';
import 'package:jadb/table_names/radkfile.dart';
import 'package:sqflite_common/sqlite_api.dart';
Future<void> seedRADKFILEData(

View File

@@ -17,6 +17,9 @@ Future<void> seedData(Database db) async {
await parseAndSeedDataFromRADKFILE(db);
await parseAndSeedDataFromKANJIDIC(db);
await parseAndSeedDataFromTanosJLPT(db);
print('Performing VACUUM');
await db.execute('VACUUM');
}
Future<void> parseAndSeedDataFromJMdict(Database db) async {

View File

@@ -3,7 +3,7 @@ abstract class SQLWritable {
const SQLWritable();
/// Returns a map of the object's properties and their values.
///
///
/// Note that there might be properties in the object which is meant to be
/// inserted into a different table. These properties will/should be excluded
/// from this map.

View File

@@ -1,7 +1,6 @@
import 'package:jadb/_data_ingestion/jmdict/table_names.dart';
import 'package:jadb/table_names/jmdict.dart';
import 'package:jadb/_data_ingestion/tanos-jlpt/objects.dart';
import 'package:jadb/_data_ingestion/tanos-jlpt/overrides.dart';
import 'package:jadb/util/sqlite_utils.dart';
import 'package:sqflite_common/sqlite_api.dart';
Future<List<int>> _findReadingCandidates(
@@ -13,7 +12,8 @@ Future<List<int>> _findReadingCandidates(
JMdictTableNames.readingElement,
columns: ['entryId'],
where:
'reading IN (${word.readings.map((e) => escapeStringValue(e)).join(',')})',
'"reading" IN (${List.filled(word.readings.length, '?').join(',')})',
whereArgs: [...word.readings],
)
.then((rows) => rows.map((row) => row['entryId'] as int).toList());
@@ -34,14 +34,20 @@ Future<List<(int, String)>> _findSenseCandidates(
JLPTRankedWord word,
Database db,
) =>
db
.rawQuery('SELECT entryId, phrase '
'FROM ${JMdictTableNames.senseGlossary} '
'JOIN ${JMdictTableNames.sense} ON ${JMdictTableNames.senseGlossary}.senseId = ${JMdictTableNames.sense}.id '
'WHERE phrase IN (${word.meanings.map((e) => escapeStringValue(e)).join(',')})')
.then((rows) => rows
.map((row) => (row['entryId'] as int, row['phrase'] as String))
.toList());
db.rawQuery(
'SELECT entryId, phrase '
'FROM "${JMdictTableNames.senseGlossary}" '
'JOIN "${JMdictTableNames.sense}" USING (senseId)'
'WHERE phrase IN (${List.filled(
word.meanings.length,
'?',
).join(',')})',
[...word.meanings],
).then(
(rows) => rows
.map((row) => (row['entryId'] as int, row['phrase'] as String))
.toList(),
);
Future<int?> findEntry(
JLPTRankedWord word,
@@ -123,7 +129,9 @@ Future<Map<String, Set<int>>> resolveAllEntries(
if (resolved != null) {
result[word.jlptLevel]!.add(resolved);
}
} catch (e) {}
} catch (e) {
print('ERROR: $e');
}
}
return result;

View File

@@ -1,4 +1,4 @@
import 'package:jadb/_data_ingestion/tanos-jlpt/table_names.dart';
import 'package:jadb/table_names/tanos_jlpt.dart';
import 'package:sqflite_common/sqlite_api.dart';
Future<void> seedTanosJLPTData(

View File

@@ -12,6 +12,15 @@ class CreateDb extends Command {
CreateDb() {
addLibsqliteArg(argParser);
argParser.addFlag(
'wal',
help: '''Whether to use Write-Ahead Logging (WAL) mode.
This is recommended for better performance, but may not be used with
the readonly NixOS store.
''',
defaultsTo: false,
);
}
Future<void> run() async {
@@ -22,10 +31,11 @@ class CreateDb extends Command {
final db = await openLocalDb(
libsqlitePath: argResults!.option('libsqlite')!,
walMode: argResults!.flag('wal'),
readWrite: true,
);
seedData(db).then((_) {
await seedData(db).then((_) {
print("Database created successfully");
}).catchError((error) {
print("Error creating database: $error");

View File

@@ -0,0 +1,46 @@
// import 'dart:io';
// import 'package:jadb/_data_ingestion/open_local_db.dart';
import 'package:jadb/cli/args.dart';
import 'package:args/command_runner.dart';
import 'package:jadb/util/lemmatizer/lemmatizer.dart';
class Lemmatize extends Command {
final name = "lemmatize";
final description = "Lemmatize a word using the Jadb lemmatizer";
Lemmatize() {
addLibsqliteArg(argParser);
addJadbArg(argParser);
argParser.addOption(
'word',
abbr: 'w',
help: 'The word to search for.',
valueHelp: 'WORD',
);
}
Future<void> run() async {
// if (argResults!.option('libsqlite') == null ||
// argResults!.option('jadb') == null) {
// print(argParser.usage);
// exit(64);
// }
// final db = await openLocalDb(
// jadbPath: argResults!.option('jadb')!,
// libsqlitePath: argResults!.option('libsqlite')!,
// );
final String searchWord = argResults!.option('word') ?? '食べたくない';
final time = Stopwatch()..start();
final result = lemmatize(searchWord);
time.stop();
print(result.toString());
print("Lemmatization took ${time.elapsedMilliseconds}ms");
}
}

View File

@@ -1,4 +1,3 @@
import 'dart:convert';
import 'dart:io';
@@ -15,6 +14,12 @@ class QueryKanji extends Command {
QueryKanji() {
addLibsqliteArg(argParser);
addJadbArg(argParser);
argParser.addOption(
'kanji',
abbr: 'k',
help: 'The kanji to search for.',
valueHelp: 'KANJI',
);
}
Future<void> run() async {
@@ -29,12 +34,18 @@ class QueryKanji extends Command {
libsqlitePath: argResults!.option('libsqlite')!,
);
final result = await JaDBConnection(db).searchKanji('');
final time = Stopwatch()..start();
final result = await JaDBConnection(db).jadbSearchKanji(
argResults!.option('kanji') ?? '',
);
time.stop();
if (result == null) {
print("No such kanji");
} else {
print(JsonEncoder.withIndent(' ').convert(result.toJson()));
}
print("Query took ${time.elapsedMilliseconds}ms");
}
}

View File

@@ -1,5 +1,3 @@
import 'dart:convert';
import 'dart:io';
import 'package:jadb/_data_ingestion/open_local_db.dart';
@@ -15,6 +13,12 @@ class QueryWord extends Command {
QueryWord() {
addLibsqliteArg(argParser);
addJadbArg(argParser);
argParser.addOption(
'word',
abbr: 'w',
help: 'The word to search for.',
valueHelp: 'WORD',
);
}
Future<void> run() async {
@@ -29,15 +33,29 @@ class QueryWord extends Command {
libsqlitePath: argResults!.option('libsqlite')!,
);
final result = await JaDBConnection(db).searchWord('kana');
final String searchWord = argResults!.option('word') ?? 'かな';
final time = Stopwatch()..start();
final count = await JaDBConnection(db).jadbSearchWordCount(searchWord);
time.stop();
final time2 = Stopwatch()..start();
final result = await JaDBConnection(db).jadbSearchWord(searchWord);
time2.stop();
if (result == null) {
print("Invalid search");
} else if (result.isEmpty) {
print("No matches");
} else {
print(JsonEncoder.withIndent(' ')
.convert(result.map((e) => e.toJson()).toList()));
for (final e in result) {
print(e.toString());
print("");
}
}
print("Total count: ${count}");
print("Count query took ${time.elapsedMilliseconds}ms");
print("Query took ${time2.elapsedMilliseconds}ms");
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,217 @@
const Map<int, List<String>> RADICALS = {
1: ['', '', '', '', '', ''],
2: [
'',
'',
'',
'',
'𠆢',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'𠂉'
],
3: [
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'广',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
''
],
4: [
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
''
],
5: [
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
''
],
6: [
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'西'
],
7: [
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
''
],
8: ['', '', '', '', '', '', '', '', '', '', '', ''],
9: ['', '', '', '', '', '', '', '', '', '', ''],
10: ['', '', '', '', '', '', '', '', '', ''],
11: ['', '', '', '鹿', '', '', '', '', ''],
12: ['', '', '', ''],
13: ['', '', '', ''],
14: ['', ''],
17: [''],
};

View File

@@ -1 +1,55 @@
enum JlptLevel { none, n5, n4, n3, n2, n1 }
enum JlptLevel implements Comparable<JlptLevel> {
none,
n1,
n2,
n3,
n4,
n5;
factory JlptLevel.fromString(String? level) {
switch (level?.toUpperCase()) {
case 'N1':
return JlptLevel.n1;
case 'N2':
return JlptLevel.n2;
case 'N3':
return JlptLevel.n3;
case 'N4':
return JlptLevel.n4;
case 'N5':
return JlptLevel.n5;
default:
return JlptLevel.none;
}
}
String? toNullableString() {
switch (this) {
case JlptLevel.n1:
return 'N1';
case JlptLevel.n2:
return 'N2';
case JlptLevel.n3:
return 'N3';
case JlptLevel.n4:
return 'N4';
case JlptLevel.n5:
return 'N5';
case JlptLevel.none:
return null;
}
}
int? get asInt =>
this == JlptLevel.none ? null : JlptLevel.values.indexOf(this);
String toString() => toNullableString() ?? 'N/A';
Object? toJson() => toNullableString();
factory JlptLevel.fromJson(Object? json) =>
JlptLevel.fromString(json as String?);
@override
int compareTo(JlptLevel other) => index - other.index;
}

View File

@@ -0,0 +1,27 @@
import 'dart:io';
import 'dart:isolate';
import 'package:path/path.dart';
import 'package:sqflite_common/sqlite_api.dart';
String migrationDirPath() {
final packageUri = Uri.parse('package:jadb/');
final packagePath = Isolate.resolvePackageUriSync(packageUri);
return packagePath!.resolve('../migrations').toFilePath();
}
Future<void> createEmptyDb(DatabaseExecutor db) async {
List<String> migrationFiles = [];
for (final file in Directory(migrationDirPath()).listSync()) {
if (file is File && file.path.endsWith('.sql')) {
migrationFiles.add(file.path);
}
}
migrationFiles.sort((a, b) => basename(a).compareTo(basename(b)));
for (final file in migrationFiles) {
final sql = await File(file).readAsString();
await db.execute(sql);
}
}

View File

@@ -24,8 +24,7 @@ enum JMdictDialect {
required this.description,
});
static JMdictDialect fromId(String id) =>
JMdictDialect.values.firstWhere(
static JMdictDialect fromId(String id) => JMdictDialect.values.firstWhere(
(e) => e.id == id,
orElse: () => throw Exception('Unknown id: $id'),
);

View File

@@ -107,8 +107,7 @@ enum JMdictField {
required this.description,
});
static JMdictField fromId(String id) =>
JMdictField.values.firstWhere(
static JMdictField fromId(String id) => JMdictField.values.firstWhere(
(e) => e.id == id,
orElse: () => throw Exception('Unknown id: $id'),
);

View File

@@ -18,8 +18,7 @@ enum JMdictKanjiInfo {
required this.description,
});
static JMdictKanjiInfo fromId(String id) =>
JMdictKanjiInfo.values.firstWhere(
static JMdictKanjiInfo fromId(String id) => JMdictKanjiInfo.values.firstWhere(
(e) => e.id == id,
orElse: () => throw Exception('Unknown id: $id'),
);

View File

@@ -79,8 +79,7 @@ enum JMdictMisc {
required this.description,
});
static JMdictMisc fromId(String id) =>
JMdictMisc.values.firstWhere(
static JMdictMisc fromId(String id) => JMdictMisc.values.firstWhere(
(e) => e.id == id,
orElse: () => throw Exception('Unknown id: $id'),
);

View File

@@ -7,14 +7,21 @@ enum JMdictPOS {
adjIx(id: 'adj-ix', description: 'adjective (keiyoushi) - yoi/ii class'),
adjKari(id: 'adj-kari', description: '\'kari\' adjective (archaic)'),
adjKu(id: 'adj-ku', description: '\'ku\' adjective (archaic)'),
adjNa(id: 'adj-na', description: 'adjectival nouns or quasi-adjectives (keiyodoshi)'),
adjNa(
id: 'adj-na',
description: 'adjectival nouns or quasi-adjectives (keiyodoshi)',
),
adjNari(id: 'adj-nari', description: 'archaic/formal form of na-adjective'),
adjNo(id: 'adj-no', description: 'nouns which may take the genitive case particle ''no'''),
adjNo(
id: 'adj-no',
description: 'nouns which may take the genitive case particle \'no\'',
shortDescription: 'Na-adjective (keiyodoshi)',
),
adjPn(id: 'adj-pn', description: 'pre-noun adjectival (rentaishi)'),
adjShiku(id: 'adj-shiku', description: '\'shiku\' adjective (archaic)'),
adjT(id: 'adj-t', description: '\'taru\' adjective'),
adv(id: 'adv', description: 'adverb (fukushi)'),
advTo(id: 'adv-to', description: 'adverb taking the ''to'' particle'),
advTo(id: 'adv-to', description: 'adverb taking the \'to\' particle'),
aux(id: 'aux', description: 'auxiliary'),
auxAdj(id: 'aux-adj', description: 'auxiliary adjective'),
auxV(id: 'aux-v', description: 'auxiliary verb'),
@@ -23,7 +30,11 @@ enum JMdictPOS {
ctr(id: 'ctr', description: 'counter'),
exp(id: 'exp', description: 'expressions (phrases, clauses, etc.)'),
int(id: 'int', description: 'interjection (kandoushi)'),
n(id: 'n', description: 'noun (common) (futsuumeishi)'),
n(
id: 'n',
description: 'noun (common) (futsuumeishi)',
shortDescription: 'noun',
),
nAdv(id: 'n-adv', description: 'adverbial noun (fukushitekimeishi)'),
nPr(id: 'n-pr', description: 'proper noun'),
nPref(id: 'n-pref', description: 'noun, used as a prefix'),
@@ -38,73 +49,159 @@ enum JMdictPOS {
vUnspec(id: 'v-unspec', description: 'verb unspecified'),
v1(id: 'v1', description: 'Ichidan verb'),
v1S(id: 'v1-s', description: 'Ichidan verb - kureru special class'),
v2aS(id: 'v2a-s', description: 'Nidan verb with ''u'' ending (archaic)'),
v2bK(id: 'v2b-k', description: 'Nidan verb (upper class) with ''bu'' ending (archaic)'),
v2bS(id: 'v2b-s', description: 'Nidan verb (lower class) with ''bu'' ending (archaic)'),
v2dK(id: 'v2d-k', description: 'Nidan verb (upper class) with ''dzu'' ending (archaic)'),
v2dS(id: 'v2d-s', description: 'Nidan verb (lower class) with ''dzu'' ending (archaic)'),
v2gK(id: 'v2g-k', description: 'Nidan verb (upper class) with ''gu'' ending (archaic)'),
v2gS(id: 'v2g-s', description: 'Nidan verb (lower class) with ''gu'' ending (archaic)'),
v2hK(id: 'v2h-k', description: 'Nidan verb (upper class) with ''hu/fu'' ending (archaic)'),
v2hS(id: 'v2h-s', description: 'Nidan verb (lower class) with ''hu/fu'' ending (archaic)'),
v2kK(id: 'v2k-k', description: 'Nidan verb (upper class) with ''ku'' ending (archaic)'),
v2kS(id: 'v2k-s', description: 'Nidan verb (lower class) with ''ku'' ending (archaic)'),
v2mK(id: 'v2m-k', description: 'Nidan verb (upper class) with ''mu'' ending (archaic)'),
v2mS(id: 'v2m-s', description: 'Nidan verb (lower class) with ''mu'' ending (archaic)'),
v2nS(id: 'v2n-s', description: 'Nidan verb (lower class) with ''nu'' ending (archaic)'),
v2rK(id: 'v2r-k', description: 'Nidan verb (upper class) with ''ru'' ending (archaic)'),
v2rS(id: 'v2r-s', description: 'Nidan verb (lower class) with ''ru'' ending (archaic)'),
v2sS(id: 'v2s-s', description: 'Nidan verb (lower class) with ''su'' ending (archaic)'),
v2tK(id: 'v2t-k', description: 'Nidan verb (upper class) with ''tsu'' ending (archaic)'),
v2tS(id: 'v2t-s', description: 'Nidan verb (lower class) with ''tsu'' ending (archaic)'),
v2wS(id: 'v2w-s', description: 'Nidan verb (lower class) with ''u'' ending and ''we'' conjugation (archaic)'),
v2yK(id: 'v2y-k', description: 'Nidan verb (upper class) with ''yu'' ending (archaic)'),
v2yS(id: 'v2y-s', description: 'Nidan verb (lower class) with ''yu'' ending (archaic)'),
v2zS(id: 'v2z-s', description: 'Nidan verb (lower class) with ''zu'' ending (archaic)'),
v4b(id: 'v4b', description: 'Yodan verb with ''bu'' ending (archaic)'),
v4g(id: 'v4g', description: 'Yodan verb with ''gu'' ending (archaic)'),
v4h(id: 'v4h', description: 'Yodan verb with ''hu/fu'' ending (archaic)'),
v4k(id: 'v4k', description: 'Yodan verb with ''ku'' ending (archaic)'),
v4m(id: 'v4m', description: 'Yodan verb with ''mu'' ending (archaic)'),
v4n(id: 'v4n', description: 'Yodan verb with ''nu'' ending (archaic)'),
v4r(id: 'v4r', description: 'Yodan verb with ''ru'' ending (archaic)'),
v4s(id: 'v4s', description: 'Yodan verb with ''su'' ending (archaic)'),
v4t(id: 'v4t', description: 'Yodan verb with ''tsu'' ending (archaic)'),
v2aS(id: 'v2a-s', description: 'Nidan verb with \'u\' ending (archaic)'),
v2bK(
id: 'v2b-k',
description: 'Nidan verb (upper class) with \'bu\' ending (archaic)',
),
v2bS(
id: 'v2b-s',
description: 'Nidan verb (lower class) with \'bu\' ending (archaic)',
),
v2dK(
id: 'v2d-k',
description: 'Nidan verb (upper class) with \'dzu\' ending (archaic)',
),
v2dS(
id: 'v2d-s',
description: 'Nidan verb (lower class) with \'dzu\' ending (archaic)',
),
v2gK(
id: 'v2g-k',
description: 'Nidan verb (upper class) with \'gu\' ending (archaic)',
),
v2gS(
id: 'v2g-s',
description: 'Nidan verb (lower class) with \'gu\' ending (archaic)',
),
v2hK(
id: 'v2h-k',
description: 'Nidan verb (upper class) with \'hu/fu\' ending (archaic)',
),
v2hS(
id: 'v2h-s',
description: 'Nidan verb (lower class) with \'hu/fu\' ending (archaic)',
),
v2kK(
id: 'v2k-k',
description: 'Nidan verb (upper class) with \'ku\' ending (archaic)',
),
v2kS(
id: 'v2k-s',
description: 'Nidan verb (lower class) with \'ku\' ending (archaic)',
),
v2mK(
id: 'v2m-k',
description: 'Nidan verb (upper class) with \'mu\' ending (archaic)',
),
v2mS(
id: 'v2m-s',
description: 'Nidan verb (lower class) with \'mu\' ending (archaic)',
),
v2nS(
id: 'v2n-s',
description: 'Nidan verb (lower class) with \'nu\' ending (archaic)',
),
v2rK(
id: 'v2r-k',
description: 'Nidan verb (upper class) with \'ru\' ending (archaic)',
),
v2rS(
id: 'v2r-s',
description: 'Nidan verb (lower class) with \'ru\' ending (archaic)',
),
v2sS(
id: 'v2s-s',
description: 'Nidan verb (lower class) with \'su\' ending (archaic)',
),
v2tK(
id: 'v2t-k',
description: 'Nidan verb (upper class) with \'tsu\' ending (archaic)',
),
v2tS(
id: 'v2t-s',
description: 'Nidan verb (lower class) with \'tsu\' ending (archaic)',
),
v2wS(
id: 'v2w-s',
description:
'Nidan verb (lower class) with \'u\' ending and \'we\' conjugation (archaic)',
),
v2yK(
id: 'v2y-k',
description: 'Nidan verb (upper class) with \'yu\' ending (archaic)',
),
v2yS(
id: 'v2y-s',
description: 'Nidan verb (lower class) with \'yu\' ending (archaic)',
),
v2zS(
id: 'v2z-s',
description: 'Nidan verb (lower class) with \'zu\' ending (archaic)',
),
v4b(id: 'v4b', description: 'Yodan verb with \'bu\' ending (archaic)'),
v4g(id: 'v4g', description: 'Yodan verb with \'gu\' ending (archaic)'),
v4h(id: 'v4h', description: 'Yodan verb with \'hu/fu\' ending (archaic)'),
v4k(id: 'v4k', description: 'Yodan verb with \'ku\' ending (archaic)'),
v4m(id: 'v4m', description: 'Yodan verb with \'mu\' ending (archaic)'),
v4n(id: 'v4n', description: 'Yodan verb with \'nu\' ending (archaic)'),
v4r(id: 'v4r', description: 'Yodan verb with \'ru\' ending (archaic)'),
v4s(id: 'v4s', description: 'Yodan verb with \'su\' ending (archaic)'),
v4t(id: 'v4t', description: 'Yodan verb with \'tsu\' ending (archaic)'),
v5aru(id: 'v5aru', description: 'Godan verb - -aru special class'),
v5b(id: 'v5b', description: 'Godan verb with ''bu'' ending'),
v5g(id: 'v5g', description: 'Godan verb with ''gu'' ending'),
v5k(id: 'v5k', description: 'Godan verb with ''ku'' ending'),
v5b(id: 'v5b', description: 'Godan verb with \'bu\' ending'),
v5g(id: 'v5g', description: 'Godan verb with \'gu\' ending'),
v5k(id: 'v5k', description: 'Godan verb with \'ku\' ending'),
v5kS(id: 'v5k-s', description: 'Godan verb - Iku/Yuku special class'),
v5m(id: 'v5m', description: 'Godan verb with ''mu'' ending'),
v5n(id: 'v5n', description: 'Godan verb with ''nu'' ending'),
v5r(id: 'v5r', description: 'Godan verb with ''ru'' ending'),
v5rI(id: 'v5r-i', description: 'Godan verb with ''ru'' ending (irregular verb)'),
v5s(id: 'v5s', description: 'Godan verb with ''su'' ending'),
v5t(id: 'v5t', description: 'Godan verb with ''tsu'' ending'),
v5u(id: 'v5u', description: 'Godan verb with ''u'' ending'),
v5uS(id: 'v5u-s', description: 'Godan verb with ''u'' ending (special class)'),
v5uru(id: 'v5uru', description: 'Godan verb - Uru old class verb (old form of Eru)'),
v5m(id: 'v5m', description: 'Godan verb with \'mu\' ending'),
v5n(id: 'v5n', description: 'Godan verb with \'nu\' ending'),
v5r(id: 'v5r', description: 'Godan verb with \'ru\' ending'),
v5rI(
id: 'v5r-i',
description: 'Godan verb with \'ru\' ending (irregular verb)',
),
v5s(id: 'v5s', description: 'Godan verb with \'su\' ending'),
v5t(id: 'v5t', description: 'Godan verb with \'tsu\' ending'),
v5u(id: 'v5u', description: 'Godan verb with \'u\' ending'),
v5uS(
id: 'v5u-s',
description: 'Godan verb with \'u\' ending (special class)',
),
v5uru(
id: 'v5uru',
description: 'Godan verb - Uru old class verb (old form of Eru)',
),
vi(id: 'vi', description: 'intransitive verb'),
vk(id: 'vk', description: 'Kuru verb - special class'),
vn(id: 'vn', description: 'irregular nu verb'),
vr(id: 'vr', description: 'irregular ru verb, plain form ends with -ri'),
vs(id: 'vs', description: 'noun or participle which takes the aux. verb suru'),
vsC(id: 'vs-c', description: 'suru verb - precursor to the modern suru'),
vs(
id: 'vs',
description: 'noun or participle which takes the aux. verb suru',
shortDescription: 'suru verb',
),
vsC(id: 'vs-c', description: 'su verb - precursor to the modern suru'),
vsI(id: 'vs-i', description: 'suru verb - included'),
vsS(id: 'vs-s', description: 'suru verb - special class'),
vt(id: 'vt', description: 'transitive verb'),
vz(id: 'vz', description: 'Ichidan verb - zuru verb (alternative form of -jiru verbs)');
vz(
id: 'vz',
description: 'Ichidan verb - zuru verb (alternative form of -jiru verbs)',
);
final String id;
final String description;
final String? _shortDescription;
const JMdictPOS({
required this.id,
required this.description,
});
String? shortDescription,
}) : _shortDescription = shortDescription;
static JMdictPOS fromId(String id) =>
JMdictPOS.values.firstWhere(
String get shortDescription => _shortDescription ?? description;
static JMdictPOS fromId(String id) => JMdictPOS.values.firstWhere(
(e) => e.id == id,
orElse: () => throw Exception('Unknown id: $id'),
);

View File

@@ -4,37 +4,48 @@ class KanjiSearchRadical extends Equatable {
/// The radical symbol.
final String symbol;
/// The names of this radical.
///
/// Each name might refer to a specific form of the radical.
final List<String> names;
/// The radical forms used in this kanji.
///
/// (e.g. "亻" for "人", "氵" for "水")
final List<String> forms;
/// The meaning of the radical.
final String meaning;
/// The meanings of the radical.
final List<String> meanings;
// ignore: public_member_api_docs
const KanjiSearchRadical({
required this.symbol,
this.forms = const [],
required this.meaning,
required this.names,
required this.forms,
required this.meanings,
});
@override
List<Object> get props => [
symbol,
this.names,
forms,
meaning,
meanings,
];
Map<String, dynamic> toJson() => {
'symbol': symbol,
'names': names,
'forms': forms,
'meaning': meaning,
'meanings': meanings,
};
factory KanjiSearchRadical.fromJson(Map<String, dynamic> json) {
return KanjiSearchRadical(
symbol: json['symbol'] as String,
names: (json['names'] as List).map((e) => e as String).toList(),
forms: (json['forms'] as List).map((e) => e as String).toList(),
meaning: json['meaning'] as String,
meanings: (json['meanings'] as List).map((e) => e as String).toList(),
);
}
}

View File

@@ -7,7 +7,8 @@ class KanjiSearchResult extends Equatable {
final String kanji;
/// The school level that the kanji is taught in, if applicable.
final String? taughtIn;
/// Ranges from `1` to `10` (except 7)
final int? taughtIn;
/// The lowest JLPT exam that this kanji is likely to appear in, if applicable.
///
@@ -38,23 +39,51 @@ class KanjiSearchResult extends Equatable {
/// Information about this character's radical, if applicable.
final KanjiSearchRadical? radical;
// TODO: document more accurately what kind of parts?
/// The parts used in this kanji.
/// All radicals/kanji parts that make up this kanji.
///
/// Note that this list might not always be complete.
final List<String> parts;
/// Ids for the kanji's symbol in different encoding systems
/// (e.g. JIS213, JIS208, UCS, etc.)
final Map<String, String> codepoints;
/// The kanji's nanori readings.
///
/// Nanori readings are special readings of kanji used in names.
final List<String> nanori;
/// How to read this kanji in different languages.
final Map<String, List<String>> alternativeLanguageReadings;
/// Common miscounts of the kanji's strokes.
final List<int> strokeMiscounts;
/// Query codes for looking up this kanji in different indexing systems.
final Map<String, List<String>> queryCodes;
/// References to other dictionaries that contain this kanji.
final Map<String, String> dictionaryReferences;
const KanjiSearchResult({
required this.kanji,
this.taughtIn,
this.jlptLevel,
this.newspaperFrequencyRank,
required this.taughtIn,
required this.jlptLevel,
required this.newspaperFrequencyRank,
required this.strokeCount,
required this.meanings,
this.kunyomi = const [],
this.onyomi = const [],
required this.kunyomi,
required this.onyomi,
// this.kunyomiExamples = const [],
// this.onyomiExamples = const [],
this.radical,
this.parts = const [],
required this.radical,
required this.parts,
required this.codepoints,
required this.nanori,
required this.alternativeLanguageReadings,
required this.strokeMiscounts,
required this.queryCodes,
required this.dictionaryReferences,
});
@override
@@ -71,6 +100,13 @@ class KanjiSearchResult extends Equatable {
// onyomiExamples,
radical,
parts,
codepoints,
kanji,
nanori,
alternativeLanguageReadings,
strokeMiscounts,
queryCodes,
dictionaryReferences,
];
Map<String, dynamic> toJson() => {
@@ -86,16 +122,18 @@ class KanjiSearchResult extends Equatable {
// 'kunyomiExamples': kunyomiExamples,
'radical': radical?.toJson(),
'parts': parts,
// 'strokeOrderDiagramUri': strokeOrderDiagramUri,
// 'strokeOrderSvgUri': strokeOrderSvgUri,
// 'strokeOrderGifUri': strokeOrderGifUri,
// 'uri': uri,
'codepoints': codepoints,
'nanori': nanori,
'alternativeLanguageReadings': alternativeLanguageReadings,
'strokeMiscounts': strokeMiscounts,
'queryCodes': queryCodes,
'dictionaryReferences': dictionaryReferences,
};
factory KanjiSearchResult.fromJson(Map<String, dynamic> json) {
return KanjiSearchResult(
kanji: json['kanji'] as String,
taughtIn: json['taughtIn'] as String?,
taughtIn: json['taughtIn'] as int?,
jlptLevel: json['jlptLevel'] as String?,
newspaperFrequencyRank: json['newspaperFrequencyRank'] as int?,
strokeCount: json['strokeCount'] as int,
@@ -112,6 +150,29 @@ class KanjiSearchResult extends Equatable {
? KanjiSearchRadical.fromJson(json['radical'])
: null,
parts: (json['parts'] as List).map((e) => e as String).toList(),
codepoints: (json['codepoints'] as Map<String, dynamic>).map(
(key, value) => MapEntry(key, value as String),
),
nanori: (json['nanori'] as List).map((e) => e as String).toList(),
alternativeLanguageReadings:
(json['alternativeLanguageReadings'] as Map<String, dynamic>).map(
(key, value) => MapEntry(
key,
(value as List).map((e) => e as String).toList(),
),
),
strokeMiscounts:
(json['strokeMiscounts'] as List).map((e) => e as int).toList(),
queryCodes: (json['queryCodes'] as Map<String, dynamic>).map(
(key, value) => MapEntry(
key,
(value as List).map((e) => e as String).toList(),
),
),
dictionaryReferences:
(json['dictionaryReferences'] as Map<String, dynamic>).map(
(key, value) => MapEntry(key, value as String),
),
);
}
}

View File

@@ -1,3 +0,0 @@
class RadicalsSearchResult {
// TODO: implement me
}

View File

@@ -0,0 +1,39 @@
import 'package:jadb/table_names/jmdict.dart';
import 'package:jadb/table_names/kanjidic.dart';
import 'package:jadb/table_names/radkfile.dart';
import 'package:jadb/table_names/tanos_jlpt.dart';
import 'package:sqflite_common/sqlite_api.dart';
Future<void> verifyTablesWithDbConnection(DatabaseExecutor db) async {
final Set<String> tables = await db
.query(
'sqlite_master',
columns: ['name'],
where: 'type = ?',
whereArgs: ['table'],
)
.then((result) {
return result.map((row) => row['name'] as String).toSet();
});
final Set<String> expectedTables = {
...JMdictTableNames.allTables,
...KANJIDICTableNames.allTables,
...RADKFILETableNames.allTables,
...TanosJLPTTableNames.allTables,
};
final missingTables = expectedTables.difference(tables);
if (missingTables.isNotEmpty) {
throw Exception([
'Missing tables:',
missingTables.map((table) => ' - $table').join('\n'),
'',
'Found tables:\n',
tables.map((table) => ' - $table').join('\n'),
'',
'Please ensure the database is correctly set up.',
].join('\n'));
}
}

View File

@@ -1,3 +1,4 @@
import 'package:jadb/models/common/jlpt_level.dart';
import 'package:jadb/models/jmdict/jmdict_kanji_info.dart';
import 'package:jadb/models/jmdict/jmdict_reading_info.dart';
import 'package:jadb/models/word_search/word_search_ruby.dart';
@@ -6,9 +7,15 @@ import 'package:jadb/models/word_search/word_search_sources.dart';
/// A class representing a single dictionary entry from a word search.
class WordSearchResult {
/// The score of the entry, used for sorting results.
final int score;
/// The ID of the entry in the database.
final int entryId;
/// Whether the word is common or not.
final bool isCommon;
/// The variants of the word in Japanese.
final List<WordSearchRuby> japanese;
@@ -21,32 +28,43 @@ class WordSearchResult {
/// The meanings of the word, including parts of speech and other information.
final List<WordSearchSense> senses;
/// The JLPT level of the word.
final JlptLevel jlptLevel;
/// A class listing the sources used to make up the data for this word search result.
final WordSearchSources sources;
const WordSearchResult({
required this.score,
required this.entryId,
required this.isCommon,
required this.japanese,
required this.kanjiInfo,
required this.readingInfo,
required this.senses,
required this.jlptLevel,
required this.sources,
});
Map<String, dynamic> toJson() => {
'_score': score,
'entryId': entryId,
'isCommon': isCommon,
'japanese': japanese.map((e) => e.toJson()).toList(),
'kanjiInfo':
kanjiInfo.map((key, value) => MapEntry(key, value.toJson())),
'readingInfo':
readingInfo.map((key, value) => MapEntry(key, value.toJson())),
'senses': senses.map((e) => e.toJson()).toList(),
'jlptLevel': jlptLevel.toJson(),
'sources': sources.toJson(),
};
factory WordSearchResult.fromJson(Map<String, dynamic> json) =>
WordSearchResult(
score: json['_score'] as int,
entryId: json['entryId'] as int,
isCommon: json['isCommon'] as bool,
japanese: (json['japanese'] as List<dynamic>)
.map((e) => WordSearchRuby.fromJson(e))
.toList(),
@@ -59,6 +77,24 @@ class WordSearchResult {
senses: (json['senses'] as List<dynamic>)
.map((e) => WordSearchSense.fromJson(e))
.toList(),
jlptLevel: JlptLevel.fromJson(json['jlptLevel'] as Object?),
sources: WordSearchSources.fromJson(json['sources']),
);
String _formatJapaneseWord(WordSearchRuby word) =>
word.furigana == null ? word.base : "${word.base} (${word.furigana})";
@override
String toString() {
final japaneseWord = _formatJapaneseWord(japanese[0]);
final isCommonString = isCommon ? '(C)' : '';
final jlptLevelString = "(${jlptLevel.toString()})";
return '''
${score} | [$entryId] $japaneseWord $isCommonString $jlptLevelString
Other forms: ${japanese.skip(1).map(_formatJapaneseWord).join(', ')}
Senses: ${senses.map((s) => s.englishDefinitions).join(', ')}
'''
.trim();
}
}

View File

@@ -2,6 +2,7 @@ import 'package:jadb/models/jmdict/jmdict_dialect.dart';
import 'package:jadb/models/jmdict/jmdict_field.dart';
import 'package:jadb/models/jmdict/jmdict_misc.dart';
import 'package:jadb/models/jmdict/jmdict_pos.dart';
import 'package:jadb/models/word_search/word_search_sense_language_source.dart';
import 'package:jadb/models/word_search/word_search_xref_entry.dart';
class WordSearchSense {
@@ -38,7 +39,7 @@ class WordSearchSense {
// TODO: there is a lot more info to collect in the languageSource data
/// Information about the the origin of the word, if loaned from another language.
final List<String> languageSource;
final List<WordSearchSenseLanguageSource> languageSource;
// TODO: add example sentences
@@ -106,6 +107,8 @@ class WordSearchSense {
misc:
(json['misc'] as List).map((e) => JMdictMisc.fromJson(e)).toList(),
info: List<String>.from(json['info']),
languageSource: List<String>.from(json['languageSource']),
languageSource: (json['languageSource'] as List)
.map((e) => WordSearchSenseLanguageSource.fromJson(e))
.toList(),
);
}

View File

@@ -0,0 +1,30 @@
/// A reference to a foreign language where this sense originates from.
class WordSearchSenseLanguageSource {
final String language;
final String? phrase;
final bool fullyDescribesSense;
final bool constructedFromSmallerWords;
const WordSearchSenseLanguageSource({
required this.language,
this.phrase,
this.fullyDescribesSense = true,
this.constructedFromSmallerWords = false,
});
Map<String, Object?> toJson() => {
'language': language,
'phrase': phrase,
'fullyDescribesSense': fullyDescribesSense,
'constructedFromSmallerWords': constructedFromSmallerWords,
};
factory WordSearchSenseLanguageSource.fromJson(Map<String, dynamic> json) =>
WordSearchSenseLanguageSource(
language: json['language'],
phrase: json['phrase'],
fullyDescribesSense: json['fullyDescribesSense'] ?? true,
constructedFromSmallerWords:
json['constructedFromSmallerWords'] ?? false,
);
}

View File

@@ -3,6 +3,12 @@ class WordSearchXrefEntry {
/// The ID of the entry that this entry cross-references to.
final int entryId;
/// The base word of the cross-referenced entry.
final String baseWord;
/// The furigana of the cross-referenced entry, if any.
final String? furigana;
/// Whether the entryId was ambiguous during the creation of the
/// database (and hence might be incorrect).
final bool ambiguous;
@@ -10,16 +16,22 @@ class WordSearchXrefEntry {
const WordSearchXrefEntry({
required this.entryId,
required this.ambiguous,
required this.baseWord,
required this.furigana,
});
Map<String, dynamic> toJson() => {
'entryId': entryId,
'ambiguous': ambiguous,
'baseWord': baseWord,
'furigana': furigana,
};
factory WordSearchXrefEntry.fromJson(Map<String, dynamic> json) =>
WordSearchXrefEntry(
entryId: json['entryId'] as int,
ambiguous: json['ambiguous'] as bool,
baseWord: json['baseWord'] as String,
furigana: json['furigana'] as String?,
);
}

View File

@@ -1,25 +1,66 @@
import 'package:jadb/models/verify_tables.dart';
import 'package:jadb/models/word_search/word_search_result.dart';
import 'package:jadb/models/kanji_search/kanji_search_result.dart';
import 'package:jadb/models/radkfile/radicals_search_result.dart';
import 'package:jadb/search/word_search.dart';
import 'package:jadb/search/filter_kanji.dart';
import 'package:jadb/search/radical_search.dart';
import 'package:jadb/search/word_search/word_search.dart';
import 'package:jadb/search/kanji_search.dart';
import 'package:sqflite_common/sqlite_api.dart';
class JaDBConnection {
final DatabaseExecutor _connection;
extension JaDBConnection on DatabaseExecutor {
/// Ensure that the database contain all JaDB tables.
///
/// This will throw an exception if any of the tables are missing.
Future<void> jadbVerifyTables() => verifyTablesWithDbConnection(this);
const JaDBConnection(this._connection);
/// Search for a kanji in the database.
Future<KanjiSearchResult?> jadbSearchKanji(String kanji) =>
searchKanjiWithDbConnection(this, kanji);
Future<KanjiSearchResult?> searchKanji(String kanji) async =>
searchKanjiWithDbConnection(this._connection, kanji);
/// Filter a list of characters, and return the ones that are listed in the kanji dictionary.
Future<List<String>> filterKanji(
List<String> kanji, {
bool deduplicate = false,
}) =>
filterKanjiWithDbConnection(this, kanji, deduplicate);
Future<RadicalsSearchResult> searchKanjiByRadicals(
List<String> radicals) async {
throw UnimplementedError();
}
/// Search for a word in the database.
Future<List<WordSearchResult>?> jadbSearchWord(
String word, {
SearchMode searchMode = SearchMode.Auto,
int page = 0,
int pageSize = 10,
}) =>
searchWordWithDbConnection(
this,
word,
searchMode,
page,
pageSize,
);
Future<List<WordSearchResult>?> searchWord(String word) async =>
searchWordWithDbConnection(this._connection, word);
///
Future<WordSearchResult?> jadbGetWordById(int id) =>
getWordByIdWithDbConnection(this, id);
/// Search for a word in the database, and return the count of results.
Future<int?> jadbSearchWordCount(
String word, {
SearchMode searchMode = SearchMode.Auto,
}) =>
searchWordCountWithDbConnection(this, word, searchMode);
/// Given a list of radicals, search which kanji contains all
/// of the radicals, find their other radicals, and return those.
/// This is used to figure out which remaining combinations of radicals
/// the user can search for without getting zero results.
Future<List<String>> jadbSearchRemainingRadicals(List<String> radicals) =>
searchRemainingRadicalsWithDbConnection(this, radicals);
/// Given a list of radicals, search which kanji contains all
/// of the radicals, and return those.
Future<List<String>> jadbSearchKanjiByRadicals(List<String> radicals) =>
searchKanjiByRadicalsWithDbConnection(this, radicals);
}

View File

@@ -0,0 +1,23 @@
import 'package:jadb/table_names/kanjidic.dart';
import 'package:sqflite_common/sqflite.dart';
Future<List<String>> filterKanjiWithDbConnection(
DatabaseExecutor connection,
List<String> kanji,
bool deduplicate,
) async {
final Set<String> filteredKanji = await connection.rawQuery(
'''
SELECT "literal"
FROM "${KANJIDICTableNames.character}"
WHERE "literal" IN (${kanji.map((_) => '?').join(',')})
''',
kanji,
).then((value) => value.map((e) => e['literal'] as String).toSet());
if (deduplicate) {
return filteredKanji.toList();
} else {
return kanji.where((k) => filteredKanji.contains(k)).toList();
}
}

View File

@@ -1,3 +1,7 @@
import 'package:collection/collection.dart';
import 'package:jadb/table_names/kanjidic.dart';
import 'package:jadb/table_names/radkfile.dart';
import 'package:jadb/models/kanji_search/kanji_search_radical.dart';
import 'package:jadb/models/kanji_search/kanji_search_result.dart';
import 'package:sqflite_common/sqflite.dart';
@@ -7,94 +11,109 @@ Future<KanjiSearchResult?> searchKanjiWithDbConnection(
) async {
late final List<Map<String, Object?>> characters;
final characters_query = connection.query(
"KANJIDIC_Character",
where: "KANJIDIC_Character.literal = ?",
KANJIDICTableNames.character,
where: "literal = ?",
whereArgs: [kanji],
);
late final List<Map<String, Object?>> codepoints;
final codepoints_query = connection.query(
"KANJIDIC_Codepoint",
where: "KANJIDIC_Codepoint.kanji = ?",
KANJIDICTableNames.codepoint,
where: "kanji = ?",
whereArgs: [kanji],
);
late final List<Map<String, Object?>> kunyomis;
final kunyomis_query = connection.query(
"KANJIDIC_Kunyomi",
where: "KANJIDIC_Kunyomi.kanji = ?",
KANJIDICTableNames.kunyomi,
where: "kanji = ?",
whereArgs: [kanji],
orderBy: "orderNum",
);
late final List<Map<String, Object?>> onyomis;
final onyomis_query = connection.query(
"KANJIDIC_Onyomi",
where: "KANJIDIC_Onyomi.kanji = ?",
KANJIDICTableNames.onyomi,
where: "kanji = ?",
whereArgs: [kanji],
orderBy: "orderNum",
);
late final List<Map<String, Object?>> meanings;
final meanings_query = connection.query(
"KANJIDIC_Meaning",
where: "KANJIDIC_Meaning.kanji = ? AND KANJIDIC_Meaning.language = ?",
KANJIDICTableNames.meaning,
where: "kanji = ? AND language = ?",
whereArgs: [kanji, 'eng'],
orderBy: "orderNum",
);
late final List<Map<String, Object?>> nanoris;
final nanoris_query = connection.query(
"KANJIDIC_Nanori",
where: "KANJIDIC_Nanori.kanji = ?",
KANJIDICTableNames.nanori,
where: "kanji = ?",
whereArgs: [kanji],
);
late final List<Map<String, Object?>> dictionary_references;
final dictionary_references_query = connection.query(
"KANJIDIC_DictionaryReference",
where: "KANJIDIC_DictionaryReference.kanji = ?",
KANJIDICTableNames.dictionaryReference,
where: "kanji = ?",
whereArgs: [kanji],
);
late final List<Map<String, Object?>> query_codes;
final query_codes_query = connection.query(
"KANJIDIC_QueryCode",
where: "KANJIDIC_QueryCode.kanji = ?",
KANJIDICTableNames.queryCode,
where: "kanji = ?",
whereArgs: [kanji],
);
late final List<Map<String, Object?>> radicals;
final radicals_query = connection.query(
"KANJIDIC_Radical",
where: "KANJIDIC_Radical.kanji = ?",
whereArgs: [kanji],
final radicals_query = connection.rawQuery(
'''
SELECT DISTINCT
"XREF__KANJIDIC_Radical__RADKFILE"."radicalSymbol" AS "symbol",
"names"
FROM "${KANJIDICTableNames.radical}"
JOIN "XREF__KANJIDIC_Radical__RADKFILE" USING ("radicalId")
LEFT JOIN (
SELECT "radicalId", group_concat("name") AS "names"
FROM "${KANJIDICTableNames.radicalName}"
GROUP BY "radicalId"
) USING ("radicalId")
WHERE "${KANJIDICTableNames.radical}"."kanji" = ?
''',
[kanji],
);
late final List<Map<String, Object?>> radical_names;
final radical_names_query = connection.query(
"KANJIDIC_RadicalName",
where: "KANJIDIC_RadicalName.kanji = ?",
late final List<Map<String, Object?>> parts;
final parts_query = connection.query(
RADKFILETableNames.radkfile,
where: "kanji = ?",
whereArgs: [kanji],
);
late final List<Map<String, Object?>> readings;
final readings_query = connection.query(
"KANJIDIC_Reading",
where: "KANJIDIC_Reading.kanji = ?",
KANJIDICTableNames.reading,
where: "kanji = ?",
whereArgs: [kanji],
);
late final List<Map<String, Object?>> stroke_miscounts;
final stroke_miscounts_query = connection.query(
"KANJIDIC_StrokeMiscount",
where: "KANJIDIC_StrokeMiscount.kanji = ?",
KANJIDICTableNames.strokeMiscount,
where: "kanji = ?",
whereArgs: [kanji],
);
late final List<Map<String, Object?>> variants;
final variants_query = connection.query(
"KANJIDIC_Variant",
where: "KANJIDIC_Variant.kanji = ?",
whereArgs: [kanji],
);
// TODO: add variant data to result
// late final List<Map<String, Object?>> variants;
// final variants_query = connection.query(
// KANJIDICTableNames.variant,
// where: "kanji = ?",
// whereArgs: [kanji],
// );
// TODO: Search for kunyomi and onyomi usage of the characters
// from JMDict. We'll need to fuzzy aquery JMDict_KanjiElement for mathces,
@@ -116,26 +135,54 @@ Future<KanjiSearchResult?> searchKanjiWithDbConnection(
dictionary_references_query.then((value) => dictionary_references = value),
query_codes_query.then((value) => query_codes = value),
radicals_query.then((value) => radicals = value),
radical_names_query.then((value) => radical_names = value),
parts_query.then((value) => parts = value),
readings_query.then((value) => readings = value),
stroke_miscounts_query.then((value) => stroke_miscounts = value),
variants_query.then((value) => variants = value),
// variants_query.then((value) => variants = value),
});
final entry = characters.first;
final String? grade = {
1: 'grade 1',
2: 'grade 2',
3: 'grade 3',
4: 'grade 4',
5: 'grade 5',
6: 'grade 6',
7: 'grade 7',
8: 'grade 8',
9: 'grade 9',
10: 'grade 10',
}[entry['grade'] as int?];
assert(radicals.length <= 1, 'There should be at most one radical per kanji');
final radical = radicals.isNotEmpty
? KanjiSearchRadical(
symbol: radicals.first['symbol'] as String,
names: (radicals.first['names'] as String?)?.split(',') ?? [],
// TODO: add radical form data
forms: [],
// TODO: add radical meaning data
meanings: [],
)
: null;
final alternativeLanguageReadings = readings
.groupListsBy(
(item) => item['type'] as String,
)
.map(
(key, value) => MapEntry(
key,
value.map((item) => item['reading'] as String).toList(),
),
);
// TODO: Add `SKIPMisclassification` to the entries
final queryCodes = query_codes
.groupListsBy(
(item) => item['type'] as String,
)
.map(
(key, value) => MapEntry(
key,
value.map((item) => item['code'] as String).toList(),
),
);
// TODO: Add `volume` and `page` to the entries
final dictionaryReferences = {
for (final entry in dictionary_references)
entry['type'] as String: entry['ref'] as String,
};
final String? jlptLevel = {
5: 'N5',
@@ -147,12 +194,24 @@ Future<KanjiSearchResult?> searchKanjiWithDbConnection(
return KanjiSearchResult(
kanji: entry['literal']! as String,
taughtIn: grade,
taughtIn: entry['grade'] as int?,
jlptLevel: jlptLevel,
newspaperFrequencyRank: entry['frequency'] as int?,
strokeCount: entry['strokeCount'] as int,
meanings: meanings.map((item) => item['meaning'] as String).toList(),
kunyomi: kunyomis.map((item) => item['yomi'] as String).toList(),
parts: parts.map((item) => item['radical'] as String).toList(),
onyomi: onyomis.map((item) => item['yomi'] as String).toList(),
radical: radical,
codepoints: {
for (final codepoint in codepoints)
codepoint['type'] as String: codepoint['codepoint'] as String,
},
nanori: nanoris.map((item) => item['nanori'] as String).toList(),
alternativeLanguageReadings: alternativeLanguageReadings,
strokeMiscounts:
stroke_miscounts.map((item) => item['strokeCount'] as int).toList(),
queryCodes: queryCodes,
dictionaryReferences: dictionaryReferences,
);
}

View File

@@ -0,0 +1,55 @@
import 'package:jadb/table_names/radkfile.dart';
import 'package:sqflite_common/sqlite_api.dart';
// TODO: validate that the list of radicals all are valid radicals
Future<List<String>> searchRemainingRadicalsWithDbConnection(
DatabaseExecutor connection,
List<String> radicals,
) async {
final queryResult = await connection.rawQuery(
'''
SELECT DISTINCT "radical"
FROM "${RADKFILETableNames.radkfile}"
WHERE "kanji" IN (
SELECT "kanji"
FROM "${RADKFILETableNames.radkfile}"
WHERE "radical" IN (${List.filled(radicals.length, '?').join(',')})
GROUP BY "kanji"
HAVING COUNT(DISTINCT "radical") = ?
)
''',
[
...radicals,
radicals.length,
],
);
final remainingRadicals =
queryResult.map((row) => row['radical'] as String).toList();
return remainingRadicals;
}
Future<List<String>> searchKanjiByRadicalsWithDbConnection(
DatabaseExecutor connection,
List<String> radicals,
) async {
final queryResult = await connection.rawQuery(
'''
SELECT "kanji"
FROM "${RADKFILETableNames.radkfile}"
WHERE "radical" IN (${List.filled(radicals.length, '?').join(',')})
GROUP BY "kanji"
HAVING COUNT(DISTINCT "radical") = ?
''',
[
...radicals,
radicals.length,
],
);
final kanji = queryResult.map((row) => row['kanji'] as String).toList();
return kanji;
}

View File

@@ -1,503 +0,0 @@
import 'package:collection/collection.dart';
import 'package:jadb/models/jmdict/jmdict_dialect.dart';
import 'package:jadb/models/jmdict/jmdict_field.dart';
import 'package:jadb/models/jmdict/jmdict_kanji_info.dart';
import 'package:jadb/models/jmdict/jmdict_misc.dart';
import 'package:jadb/models/jmdict/jmdict_pos.dart';
import 'package:jadb/models/jmdict/jmdict_reading_info.dart';
import 'package:jadb/models/word_search/word_search_result.dart';
import 'package:jadb/models/word_search/word_search_ruby.dart';
import 'package:jadb/models/word_search/word_search_sense.dart';
import 'package:jadb/models/word_search/word_search_sources.dart';
import 'package:jadb/models/word_search/word_search_xref_entry.dart';
import 'package:jadb/util/sqlite_utils.dart';
import 'package:sqflite_common/sqlite_api.dart';
// TODO: Support globs
// TODO: Support tags
// TODO: Prefer original kana type when sorting results
// TODO: Support mixing kana and romaji
Future<List<WordSearchResult>?> searchWordWithDbConnection(
DatabaseExecutor connection,
String word, {
bool isKana = true,
}) async {
if (word.isEmpty) {
return null;
}
late final List<int> entryIds;
if (isKana) {
entryIds = (await connection.query(
'JMdict_EntryByKana',
where: 'kana LIKE ?',
whereArgs: ['$word%'],
))
.map((row) => row['entryId'] as int)
.toList();
} else {
entryIds = (await connection.query(
'JMdict_EntryByEnglish',
where: 'english LIKE ?',
whereArgs: ['$word%'],
))
.map((row) => row['entryId'] as int)
.toList();
}
if (entryIds.isEmpty) {
return [];
}
late final List<Map<String, Object?>> senses;
final Future<List<Map<String, Object?>>> senses_query = connection.query(
'JMdict_Sense',
where: 'entryId IN (${entryIds.join(',')})',
);
late final List<Map<String, Object?>> readingElements;
final Future<List<Map<String, Object?>>> readingElements_query =
connection.query(
'JMdict_ReadingElement',
where: 'entryId IN (${entryIds.join(',')})',
);
late final List<Map<String, Object?>> kanjiElements;
final Future<List<Map<String, Object?>>> kanjiElements_query =
connection.query(
'JMdict_KanjiElement',
where: 'entryId IN (${entryIds.join(',')})',
);
await Future.wait([
senses_query.then((value) => senses = value),
readingElements_query.then((value) => readingElements = value),
kanjiElements_query.then((value) => kanjiElements = value),
]);
// Sense queries
final senseIds = senses.map((element) => element['id'] as int).toList();
late final List<Map<String, Object?>> senseAntonyms;
final Future<List<Map<String, Object?>>> senseAntonyms_query =
connection.query(
'JMdict_SenseAntonym',
where: 'senseId IN (${senseIds.join(',')})',
);
late final List<Map<String, Object?>> senseDialects;
final Future<List<Map<String, Object?>>> senseDialects_query =
connection.query(
'JMdict_SenseDialect',
where: 'senseId IN (${senseIds.join(',')})',
);
late final List<Map<String, Object?>> senseFields;
final Future<List<Map<String, Object?>>> senseFields_query = connection.query(
'JMdict_SenseField',
where: 'senseId IN (${senseIds.join(',')})',
);
late final List<Map<String, Object?>> senseGlossaries;
final Future<List<Map<String, Object?>>> senseGlossaries_query =
connection.query(
'JMdict_SenseGlossary',
where: 'senseId IN (${senseIds.join(',')})',
);
late final List<Map<String, Object?>> senseInfos;
final Future<List<Map<String, Object?>>> senseInfos_query = connection.query(
'JMdict_SenseInfo',
where: 'senseId IN (${senseIds.join(',')})',
);
late final List<Map<String, Object?>> senseLanguageSources;
final Future<List<Map<String, Object?>>> senseLanguageSources_query =
connection.query(
'JMdict_SenseLanguageSource',
where: 'senseId IN (${senseIds.join(',')})',
);
late final List<Map<String, Object?>> senseMiscs;
final Future<List<Map<String, Object?>>> senseMiscs_query = connection.query(
'JMdict_SenseMisc',
where: 'senseId IN (${senseIds.join(',')})',
);
late final List<Map<String, Object?>> sensePOSs;
final Future<List<Map<String, Object?>>> sensePOSs_query = connection.query(
'JMdict_SensePOS',
where: 'senseId IN (${senseIds.join(',')})',
);
late final List<Map<String, Object?>> senseRestrictedToKanjis;
final Future<List<Map<String, Object?>>> senseRestrictedToKanjis_query =
connection.query(
'JMdict_SenseRestrictedToKanji',
where: 'senseId IN (${senseIds.join(',')})',
);
late final List<Map<String, Object?>> senseRestrictedToReadings;
final Future<List<Map<String, Object?>>> senseRestrictedToReadings_query =
connection.query(
'JMdict_SenseRestrictedToReading',
where: 'senseId IN (${senseIds.join(',')})',
);
late final List<Map<String, Object?>> senseSeeAlsos;
final Future<List<Map<String, Object?>>> senseSeeAlsos_query =
connection.query(
'JMdict_SenseSeeAlso',
where: 'senseId IN (${senseIds.join(',')})',
);
late final List<Map<String, Object?>> exampleSentences;
final Future<List<Map<String, Object?>>> exampleSentences_query =
connection.query(
'JMdict_ExampleSentence',
where: 'senseId IN (${senseIds.join(',')})',
);
// Reading queries
final readingIds = readingElements
.map((element) => (
element['entryId'] as int,
escapeStringValue(element['reading'] as String)
))
.toList();
late final List<Map<String, Object?>> readingElementInfos;
final Future<List<Map<String, Object?>>> readingElementInfos_query =
connection.query(
'JMdict_ReadingElementInfo',
where: '(entryId, reading) IN (${readingIds.join(',')})',
);
late final List<Map<String, Object?>> readingElementRestrictions;
final Future<List<Map<String, Object?>>> readingElementRestrictions_query =
connection.query(
'JMdict_ReadingElementRestriction',
where: '(entryId, reading) IN (${readingIds.join(',')})',
);
// Kanji queries
final kanjiIds = kanjiElements
.map((element) => (
element['entryId'] as int,
escapeStringValue(element['reading'] as String)
))
.toList();
late final List<Map<String, Object?>> kanjiElementInfos;
final Future<List<Map<String, Object?>>> kanjiElementInfos_query =
connection.query(
'JMdict_KanjiElementInfo',
where: '(entryId, reading) IN (${kanjiIds.join(',')})',
);
await Future.wait([
senseAntonyms_query.then((value) => senseAntonyms = value),
senseDialects_query.then((value) => senseDialects = value),
senseFields_query.then((value) => senseFields = value),
senseGlossaries_query.then((value) => senseGlossaries = value),
senseInfos_query.then((value) => senseInfos = value),
senseLanguageSources_query.then((value) => senseLanguageSources = value),
senseMiscs_query.then((value) => senseMiscs = value),
sensePOSs_query.then((value) => sensePOSs = value),
senseRestrictedToKanjis_query
.then((value) => senseRestrictedToKanjis = value),
senseRestrictedToReadings_query
.then((value) => senseRestrictedToReadings = value),
senseSeeAlsos_query.then((value) => senseSeeAlsos = value),
exampleSentences_query.then((value) => exampleSentences = value),
readingElementInfos_query.then((value) => readingElementInfos = value),
readingElementRestrictions_query
.then((value) => readingElementRestrictions = value),
kanjiElementInfos_query.then((value) => kanjiElementInfos = value),
]);
return _regroupWordSearchResults(
entryIds: entryIds,
readingElements: readingElements,
kanjiElements: kanjiElements,
senses: senses,
senseAntonyms: senseAntonyms,
senseDialects: senseDialects,
senseFields: senseFields,
senseGlossaries: senseGlossaries,
senseInfos: senseInfos,
senseLanguageSources: senseLanguageSources,
senseMiscs: senseMiscs,
sensePOSs: sensePOSs,
senseRestrictedToKanjis: senseRestrictedToKanjis,
senseRestrictedToReadings: senseRestrictedToReadings,
senseSeeAlsos: senseSeeAlsos,
exampleSentences: exampleSentences,
readingElementInfos: readingElementInfos,
readingElementRestrictions: readingElementRestrictions,
kanjiElementInfos: kanjiElementInfos,
);
}
List<WordSearchResult> _regroupWordSearchResults({
required List<int> entryIds,
required List<Map<String, Object?>> readingElements,
required List<Map<String, Object?>> kanjiElements,
required List<Map<String, Object?>> senses,
required List<Map<String, Object?>> senseAntonyms,
required List<Map<String, Object?>> senseDialects,
required List<Map<String, Object?>> senseFields,
required List<Map<String, Object?>> senseGlossaries,
required List<Map<String, Object?>> senseInfos,
required List<Map<String, Object?>> senseLanguageSources,
required List<Map<String, Object?>> senseMiscs,
required List<Map<String, Object?>> sensePOSs,
required List<Map<String, Object?>> senseRestrictedToKanjis,
required List<Map<String, Object?>> senseRestrictedToReadings,
required List<Map<String, Object?>> senseSeeAlsos,
required List<Map<String, Object?>> exampleSentences,
required List<Map<String, Object?>> readingElementInfos,
required List<Map<String, Object?>> readingElementRestrictions,
required List<Map<String, Object?>> kanjiElementInfos,
}) {
final List<WordSearchResult> results = [];
for (final entryId in entryIds) {
final List<Map<String, Object?>> entryReadingElements = readingElements
.where((element) => element['entryId'] == entryId)
.toList();
final List<Map<String, Object?>> entryKanjiElements = kanjiElements
.where((element) => element['entryId'] == entryId)
.toList();
final List<Map<String, Object?>> entrySenses =
senses.where((element) => element['entryId'] == entryId).toList();
final GroupedWordResult entryReadingElementsGrouped = _regroup_words(
entryId: entryId,
readingElements: entryReadingElements,
kanjiElements: entryKanjiElements,
readingElementInfos: readingElementInfos,
readingElementRestrictions: readingElementRestrictions,
kanjiElementInfos: kanjiElementInfos,
);
final List<WordSearchSense> entrySensesGrouped = _regroup_senses(
senses: entrySenses,
senseAntonyms: senseAntonyms,
senseDialects: senseDialects,
senseFields: senseFields,
senseGlossaries: senseGlossaries,
senseInfos: senseInfos,
senseLanguageSources: senseLanguageSources,
senseMiscs: senseMiscs,
sensePOSs: sensePOSs,
senseRestrictedToKanjis: senseRestrictedToKanjis,
senseRestrictedToReadings: senseRestrictedToReadings,
senseSeeAlsos: senseSeeAlsos,
exampleSentences: exampleSentences,
);
results.add(
WordSearchResult(
entryId: entryId,
japanese: entryReadingElementsGrouped.rubys,
kanjiInfo: entryReadingElementsGrouped.kanjiInfos,
readingInfo: entryReadingElementsGrouped.readingInfos,
senses: entrySensesGrouped,
sources: const WordSearchSources(
jmdict: true,
jmnedict: false,
),
),
);
}
return results;
}
class GroupedWordResult {
final List<WordSearchRuby> rubys;
final Map<String, JMdictReadingInfo> readingInfos;
final Map<String, JMdictKanjiInfo> kanjiInfos;
const GroupedWordResult({
required this.rubys,
required this.readingInfos,
required this.kanjiInfos,
});
}
GroupedWordResult _regroup_words({
required int entryId,
required List<Map<String, Object?>> kanjiElements,
required List<Map<String, Object?>> kanjiElementInfos,
required List<Map<String, Object?>> readingElements,
required List<Map<String, Object?>> readingElementInfos,
required List<Map<String, Object?>> readingElementRestrictions,
}) {
final List<WordSearchRuby> result = [];
final kanjiElements_ =
kanjiElements.where((element) => element['entryId'] == entryId).toList();
final readingElements_ = readingElements
.where((element) => element['entryId'] == entryId)
.toList();
final readingElementRestrictions_ = readingElementRestrictions
.where((element) => element['entryId'] == entryId)
.toList();
for (final readingElement in readingElements_) {
for (final kanjiElement in kanjiElements_) {
final kanji = kanjiElement['reading'] as String;
final reading = readingElement['reading'] as String;
final doesNotMatchKanji = readingElement['doesNotMatchKanji'] == 1;
if (doesNotMatchKanji) {
continue;
}
final restrictions = readingElementRestrictions_
.where((element) => element['reading'] == reading)
.toList();
if (restrictions.isNotEmpty &&
!restrictions.any((element) => element['restriction'] == kanji)) {
continue;
}
final ruby = WordSearchRuby(
base: kanji,
furigana: reading,
);
result.add(ruby);
}
}
for (final readingElement
in readingElements_.where((e) => e['doesNotMatchKanji'] == 1)) {
final reading = readingElement['reading'] as String;
final ruby = WordSearchRuby(
base: reading,
);
result.add(ruby);
}
return GroupedWordResult(
rubys: result,
readingInfos: Map.fromEntries(
readingElementInfos.map((e) => MapEntry(
e['reading'] as String,
JMdictReadingInfo.fromId(e['info'] as String),
)),
),
kanjiInfos: Map.fromEntries(
kanjiElementInfos.map((e) => MapEntry(
e['reading'] as String,
JMdictKanjiInfo.fromId(e['info'] as String),
)),
),
);
}
List<WordSearchSense> _regroup_senses({
required List<Map<String, Object?>> senses,
required List<Map<String, Object?>> senseAntonyms,
required List<Map<String, Object?>> senseDialects,
required List<Map<String, Object?>> senseFields,
required List<Map<String, Object?>> senseGlossaries,
required List<Map<String, Object?>> senseInfos,
required List<Map<String, Object?>> senseLanguageSources,
required List<Map<String, Object?>> senseMiscs,
required List<Map<String, Object?>> sensePOSs,
required List<Map<String, Object?>> senseRestrictedToKanjis,
required List<Map<String, Object?>> senseRestrictedToReadings,
required List<Map<String, Object?>> senseSeeAlsos,
required List<Map<String, Object?>> exampleSentences,
}) {
final groupedSenseAntonyms =
senseAntonyms.groupListsBy((element) => element['senseId'] as int);
final groupedSenseDialects =
senseDialects.groupListsBy((element) => element['senseId'] as int);
final groupedSenseFields =
senseFields.groupListsBy((element) => element['senseId'] as int);
final groupedSenseGlossaries =
senseGlossaries.groupListsBy((element) => element['senseId'] as int);
final groupedSenseInfos =
senseInfos.groupListsBy((element) => element['senseId'] as int);
final groupedSenseLanguageSources =
senseLanguageSources.groupListsBy((element) => element['senseId'] as int);
final groupedSenseMiscs =
senseMiscs.groupListsBy((element) => element['senseId'] as int);
final groupedSensePOSs =
sensePOSs.groupListsBy((element) => element['senseId'] as int);
final groupedSenseRestrictedToKanjis = senseRestrictedToKanjis
.groupListsBy((element) => element['senseId'] as int);
final groupedSenseRestrictedToReadings = senseRestrictedToReadings
.groupListsBy((element) => element['senseId'] as int);
final groupedSenseSeeAlsos =
senseSeeAlsos.groupListsBy((element) => element['senseId'] as int);
final List<WordSearchSense> result = [];
for (final sense in senses) {
final int senseId = sense['id'] as int;
final antonyms = groupedSenseAntonyms[senseId] ?? [];
final dialects = groupedSenseDialects[senseId] ?? [];
final fields = groupedSenseFields[senseId] ?? [];
final glossaries = groupedSenseGlossaries[senseId] ?? [];
final infos = groupedSenseInfos[senseId] ?? [];
final languageSources = groupedSenseLanguageSources[senseId] ?? [];
final miscs = groupedSenseMiscs[senseId] ?? [];
final pos = groupedSensePOSs[senseId] ?? [];
final restrictedToKanjis = groupedSenseRestrictedToKanjis[senseId] ?? [];
final restrictedToReadings =
groupedSenseRestrictedToReadings[senseId] ?? [];
final seeAlsos = groupedSenseSeeAlsos[senseId] ?? [];
final resultSense = WordSearchSense(
englishDefinitions: glossaries.map((e) => e['phrase'] as String).toList(),
partsOfSpeech:
pos.map((e) => JMdictPOS.fromId(e['pos'] as String)).toList(),
seeAlso: seeAlsos
.map((e) => WordSearchXrefEntry(
entryId: e['xrefEntryId'] as int,
ambiguous: e['ambiguous'] == 1,
))
.toList(),
antonyms: antonyms
.map((e) => WordSearchXrefEntry(
entryId: e['xrefEntryId'] as int,
ambiguous: e['ambiguous'] == 1,
))
.toList(),
restrictedToReading:
restrictedToReadings.map((e) => e['reading'] as String).toList(),
restrictedToKanji:
restrictedToKanjis.map((e) => e['kanji'] as String).toList(),
fields:
fields.map((e) => JMdictField.fromId(e['field'] as String)).toList(),
dialects: dialects
.map((e) => JMdictDialect.fromId(e['dialect'] as String))
.toList(),
misc: miscs.map((e) => JMdictMisc.fromId(e['misc'] as String)).toList(),
info: infos.map((e) => e['info'] as String).toList(),
languageSource:
languageSources.map((e) => e['language'] as String).toList(),
);
result.add(resultSense);
}
return result;
}

View File

@@ -0,0 +1,312 @@
import 'package:jadb/table_names/jmdict.dart';
import 'package:jadb/table_names/tanos_jlpt.dart';
import 'package:jadb/util/sqlite_utils.dart';
import 'package:sqflite_common/sqflite.dart';
class LinearWordQueryData {
final List<Map<String, Object?>> senses;
final List<Map<String, Object?>> readingElements;
final List<Map<String, Object?>> kanjiElements;
final List<Map<String, Object?>> jlptTags;
final List<Map<String, Object?>> commonEntries;
final List<Map<String, Object?>> senseAntonyms;
final List<Map<String, Object?>> senseDialects;
final List<Map<String, Object?>> senseFields;
final List<Map<String, Object?>> senseGlossaries;
final List<Map<String, Object?>> senseInfos;
final List<Map<String, Object?>> senseLanguageSources;
final List<Map<String, Object?>> senseMiscs;
final List<Map<String, Object?>> sensePOSs;
final List<Map<String, Object?>> senseRestrictedToKanjis;
final List<Map<String, Object?>> senseRestrictedToReadings;
final List<Map<String, Object?>> senseSeeAlsos;
final List<Map<String, Object?>> exampleSentences;
final List<Map<String, Object?>> readingElementInfos;
final List<Map<String, Object?>> readingElementRestrictions;
final List<Map<String, Object?>> kanjiElementInfos;
const LinearWordQueryData({
required this.senses,
required this.readingElements,
required this.kanjiElements,
required this.jlptTags,
required this.commonEntries,
required this.senseAntonyms,
required this.senseDialects,
required this.senseFields,
required this.senseGlossaries,
required this.senseInfos,
required this.senseLanguageSources,
required this.senseMiscs,
required this.sensePOSs,
required this.senseRestrictedToKanjis,
required this.senseRestrictedToReadings,
required this.senseSeeAlsos,
required this.exampleSentences,
required this.readingElementInfos,
required this.readingElementRestrictions,
required this.kanjiElementInfos,
});
}
Future<LinearWordQueryData> fetchLinearWordQueryData(
DatabaseExecutor connection,
List<int> entryIds,
) async {
late final List<Map<String, Object?>> senses;
final Future<List<Map<String, Object?>>> senses_query = connection.query(
JMdictTableNames.sense,
where: 'entryId IN (${List.filled(entryIds.length, '?').join(',')})',
whereArgs: entryIds,
);
late final List<Map<String, Object?>> readingElements;
final Future<List<Map<String, Object?>>> readingElements_query =
connection.query(
JMdictTableNames.readingElement,
where: 'entryId IN (${List.filled(entryIds.length, '?').join(',')})',
whereArgs: entryIds,
orderBy: 'orderNum',
);
late final List<Map<String, Object?>> kanjiElements;
final Future<List<Map<String, Object?>>> kanjiElements_query =
connection.query(
JMdictTableNames.kanjiElement,
where: 'entryId IN (${List.filled(entryIds.length, '?').join(',')})',
whereArgs: entryIds,
orderBy: 'orderNum',
);
late final List<Map<String, Object?>> jlptTags;
final Future<List<Map<String, Object?>>> jlptTags_query = connection.query(
TanosJLPTTableNames.jlptTag,
where: 'entryId IN (${List.filled(entryIds.length, '?').join(',')})',
whereArgs: entryIds,
);
late final List<Map<String, Object?>> commonEntries;
final Future<List<Map<String, Object?>>> commonEntries_query =
connection.query(
'JMdict_EntryCommon',
where: 'entryId IN (${List.filled(entryIds.length, '?').join(',')})',
whereArgs: entryIds,
);
await Future.wait([
senses_query.then((value) => senses = value),
readingElements_query.then((value) => readingElements = value),
kanjiElements_query.then((value) => kanjiElements = value),
jlptTags_query.then((value) => jlptTags = value),
commonEntries_query.then((value) => commonEntries = value),
]);
// Sense queries
final senseIds = senses.map((sense) => sense['senseId'] as int).toList();
late final List<Map<String, Object?>> senseAntonyms;
final Future<List<Map<String, Object?>>> senseAntonyms_query =
connection.rawQuery(
"""
SELECT
"${JMdictTableNames.senseAntonyms}".senseId,
"${JMdictTableNames.senseAntonyms}".ambiguous,
"${JMdictTableNames.senseAntonyms}".xrefEntryId,
"JMdict_BaseAndFurigana"."base",
"JMdict_BaseAndFurigana"."furigana"
FROM "${JMdictTableNames.senseAntonyms}"
JOIN "JMdict_BaseAndFurigana"
ON "${JMdictTableNames.senseAntonyms}"."xrefEntryId" = "JMdict_BaseAndFurigana"."entryId"
WHERE
"senseId" IN (${List.filled(senseIds.length, '?').join(',')})
AND "JMdict_BaseAndFurigana"."isFirst"
ORDER BY
"${JMdictTableNames.senseAntonyms}"."senseId",
"${JMdictTableNames.senseAntonyms}"."xrefEntryId"
""",
[...senseIds],
);
late final List<Map<String, Object?>> senseDialects;
final Future<List<Map<String, Object?>>> senseDialects_query =
connection.query(
JMdictTableNames.senseDialect,
where: 'senseId IN (${List.filled(senseIds.length, '?').join(',')})',
whereArgs: senseIds,
);
late final List<Map<String, Object?>> senseFields;
final Future<List<Map<String, Object?>>> senseFields_query = connection.query(
JMdictTableNames.senseField,
where: 'senseId IN (${List.filled(senseIds.length, '?').join(',')})',
whereArgs: senseIds,
);
late final List<Map<String, Object?>> senseGlossaries;
final Future<List<Map<String, Object?>>> senseGlossaries_query =
connection.query(
JMdictTableNames.senseGlossary,
where: 'senseId IN (${List.filled(senseIds.length, '?').join(',')})',
whereArgs: senseIds,
);
late final List<Map<String, Object?>> senseInfos;
final Future<List<Map<String, Object?>>> senseInfos_query = connection.query(
JMdictTableNames.senseInfo,
where: 'senseId IN (${List.filled(senseIds.length, '?').join(',')})',
whereArgs: senseIds,
);
late final List<Map<String, Object?>> senseLanguageSources;
final Future<List<Map<String, Object?>>> senseLanguageSources_query =
connection.query(
JMdictTableNames.senseLanguageSource,
where: 'senseId IN (${List.filled(senseIds.length, '?').join(',')})',
whereArgs: senseIds,
);
late final List<Map<String, Object?>> senseMiscs;
final Future<List<Map<String, Object?>>> senseMiscs_query = connection.query(
JMdictTableNames.senseMisc,
where: 'senseId IN (${List.filled(senseIds.length, '?').join(',')})',
whereArgs: senseIds,
);
late final List<Map<String, Object?>> sensePOSs;
final Future<List<Map<String, Object?>>> sensePOSs_query = connection.query(
JMdictTableNames.sensePOS,
where: 'senseId IN (${List.filled(senseIds.length, '?').join(',')})',
whereArgs: senseIds,
);
late final List<Map<String, Object?>> senseRestrictedToKanjis;
final Future<List<Map<String, Object?>>> senseRestrictedToKanjis_query =
connection.query(
JMdictTableNames.senseRestrictedToKanji,
where: 'senseId IN (${List.filled(senseIds.length, '?').join(',')})',
whereArgs: senseIds,
);
late final List<Map<String, Object?>> senseRestrictedToReadings;
final Future<List<Map<String, Object?>>> senseRestrictedToReadings_query =
connection.query(
JMdictTableNames.senseRestrictedToReading,
where: 'senseId IN (${List.filled(senseIds.length, '?').join(',')})',
whereArgs: senseIds,
);
late final List<Map<String, Object?>> senseSeeAlsos;
final Future<List<Map<String, Object?>>> senseSeeAlsos_query =
connection.rawQuery(
"""
SELECT
"${JMdictTableNames.senseSeeAlso}"."senseId",
"${JMdictTableNames.senseSeeAlso}"."ambiguous",
"${JMdictTableNames.senseSeeAlso}"."xrefEntryId",
"JMdict_BaseAndFurigana"."base",
"JMdict_BaseAndFurigana"."furigana"
FROM "${JMdictTableNames.senseSeeAlso}"
JOIN "JMdict_BaseAndFurigana"
ON "${JMdictTableNames.senseSeeAlso}"."xrefEntryId" = "JMdict_BaseAndFurigana"."entryId"
WHERE
"senseId" IN (${List.filled(senseIds.length, '?').join(',')})
AND "JMdict_BaseAndFurigana"."isFirst"
ORDER BY
"${JMdictTableNames.senseSeeAlso}"."senseId",
"${JMdictTableNames.senseSeeAlso}"."xrefEntryId"
""",
[...senseIds],
);
late final List<Map<String, Object?>> exampleSentences;
final Future<List<Map<String, Object?>>> exampleSentences_query =
connection.query(
'JMdict_ExampleSentence',
where: 'senseId IN (${List.filled(senseIds.length, '?').join(',')})',
whereArgs: senseIds,
);
// Reading queries
final readingIds = readingElements
.map((element) => (
element['entryId'] as int,
escapeStringValue(element['reading'] as String)
))
.toList();
late final List<Map<String, Object?>> readingElementInfos;
final Future<List<Map<String, Object?>>> readingElementInfos_query =
connection.query(
JMdictTableNames.readingInfo,
where: '(entryId, reading) IN (${readingIds.join(',')})',
);
late final List<Map<String, Object?>> readingElementRestrictions;
final Future<List<Map<String, Object?>>> readingElementRestrictions_query =
connection.query(
JMdictTableNames.readingRestriction,
where: '(entryId, reading) IN (${readingIds.join(',')})',
);
// Kanji queries
final kanjiIds = kanjiElements
.map((element) => (
element['entryId'] as int,
escapeStringValue(element['reading'] as String)
))
.toList();
late final List<Map<String, Object?>> kanjiElementInfos;
final Future<List<Map<String, Object?>>> kanjiElementInfos_query =
connection.query(
JMdictTableNames.kanjiInfo,
where: '(entryId, reading) IN (${kanjiIds.join(',')})',
);
await Future.wait([
senseAntonyms_query.then((value) => senseAntonyms = value),
senseDialects_query.then((value) => senseDialects = value),
senseFields_query.then((value) => senseFields = value),
senseGlossaries_query.then((value) => senseGlossaries = value),
senseInfos_query.then((value) => senseInfos = value),
senseLanguageSources_query.then((value) => senseLanguageSources = value),
senseMiscs_query.then((value) => senseMiscs = value),
sensePOSs_query.then((value) => sensePOSs = value),
senseRestrictedToKanjis_query
.then((value) => senseRestrictedToKanjis = value),
senseRestrictedToReadings_query
.then((value) => senseRestrictedToReadings = value),
senseSeeAlsos_query.then((value) => senseSeeAlsos = value),
exampleSentences_query.then((value) => exampleSentences = value),
readingElementInfos_query.then((value) => readingElementInfos = value),
readingElementRestrictions_query
.then((value) => readingElementRestrictions = value),
kanjiElementInfos_query.then((value) => kanjiElementInfos = value),
]);
return LinearWordQueryData(
senses: senses,
readingElements: readingElements,
kanjiElements: kanjiElements,
jlptTags: jlptTags,
commonEntries: commonEntries,
senseAntonyms: senseAntonyms,
senseDialects: senseDialects,
senseFields: senseFields,
senseGlossaries: senseGlossaries,
senseInfos: senseInfos,
senseLanguageSources: senseLanguageSources,
senseMiscs: senseMiscs,
sensePOSs: sensePOSs,
senseRestrictedToKanjis: senseRestrictedToKanjis,
senseRestrictedToReadings: senseRestrictedToReadings,
senseSeeAlsos: senseSeeAlsos,
exampleSentences: exampleSentences,
readingElementInfos: readingElementInfos,
readingElementRestrictions: readingElementRestrictions,
kanjiElementInfos: kanjiElementInfos,
);
}

View File

@@ -0,0 +1,339 @@
import 'package:jadb/table_names/jmdict.dart';
import 'package:jadb/search/word_search/word_search.dart';
import 'package:jadb/util/text_filtering.dart';
import 'package:sqflite_common/sqlite_api.dart';
class ScoredEntryId {
final int entryId;
final int score;
const ScoredEntryId(this.entryId, this.score);
}
SearchMode _determineSearchMode(String word) {
final bool containsKanji = kanjiRegex.hasMatch(word);
final bool containsAscii = RegExp(r'[A-Za-z]').hasMatch(word);
if (containsKanji && containsAscii) {
return SearchMode.MixedKanji;
} else if (containsKanji) {
return SearchMode.Kanji;
} else if (containsAscii) {
return SearchMode.English;
} else if (word.contains(hiraganaRegex) || word.contains(katakanaRegex)) {
return SearchMode.Kana;
} else {
return SearchMode.MixedKana;
}
}
/// FTS reacts to certain characters, so we should filter them out.
String _filterFTSSensitiveCharacters(String word) {
return word
.replaceAll('.', '')
.replaceAll('-', '')
.replaceAll('*', '')
.replaceAll('+', '')
.replaceAll('(', '')
.replaceAll(')', '')
.replaceAll('^', '')
.replaceAll('\"', '');
}
(String, List<Object?>) _kanjiReadingTemplate(
String tableName,
String word, {
int pageSize = 10,
bool countOnly = false,
}) =>
(
'''
WITH
fts_results AS (
SELECT DISTINCT
"${tableName}FTS"."entryId",
100
+ (("${tableName}FTS"."reading" = ?) * 50)
+ "JMdict_EntryScore"."score"
AS "score"
FROM "${tableName}FTS"
JOIN "${tableName}" USING ("entryId", "reading")
JOIN "JMdict_EntryScore" USING ("entryId", "reading")
WHERE "${tableName}FTS"."reading" MATCH ? || '*'
AND "JMdict_EntryScore"."type" = '${tableName == JMdictTableNames.kanjiElement ? 'kanji' : 'reading'}'
ORDER BY
"JMdict_EntryScore"."score" DESC
${!countOnly ? 'LIMIT ?' : ''}
),
non_fts_results AS (
SELECT DISTINCT
"${tableName}"."entryId",
50
+ "JMdict_EntryScore"."score"
AS "score"
FROM "${tableName}"
JOIN "JMdict_EntryScore" USING ("entryId", "reading")
WHERE "reading" LIKE '%' || ? || '%'
AND "entryId" NOT IN (SELECT "entryId" FROM "fts_results")
AND "JMdict_EntryScore"."type" = '${tableName == JMdictTableNames.kanjiElement ? 'kanji' : 'reading'}'
ORDER BY
"JMdict_EntryScore"."score" DESC,
"${tableName}"."entryId" ASC
${!countOnly ? 'LIMIT ?' : ''}
)
${countOnly ? 'SELECT COUNT("entryId") AS count' : 'SELECT "entryId", "score"'}
FROM (
SELECT * FROM fts_results
UNION ALL
SELECT * FROM non_fts_results
)
'''
.trim(),
[
_filterFTSSensitiveCharacters(word),
_filterFTSSensitiveCharacters(word),
if (!countOnly) pageSize,
_filterFTSSensitiveCharacters(word),
if (!countOnly) pageSize,
]
);
Future<List<ScoredEntryId>> _queryKanji(
DatabaseExecutor connection,
String word,
int pageSize,
int? offset,
) {
final (query, args) = _kanjiReadingTemplate(
JMdictTableNames.kanjiElement,
word,
pageSize: pageSize,
);
return connection.rawQuery(query, args).then((result) => result
.map((row) => ScoredEntryId(
row['entryId'] as int,
row['score'] as int,
))
.toList());
}
Future<int> _queryKanjiCount(
DatabaseExecutor connection,
String word,
) {
final (query, args) = _kanjiReadingTemplate(
JMdictTableNames.kanjiElement,
word,
countOnly: true,
);
return connection
.rawQuery(query, args)
.then((result) => result.first['count'] as int);
}
Future<List<ScoredEntryId>> _queryKana(
DatabaseExecutor connection,
String word,
int pageSize,
int? offset,
) {
final (query, args) = _kanjiReadingTemplate(
JMdictTableNames.readingElement,
word,
pageSize: pageSize,
);
return connection.rawQuery(query, args).then((result) => result
.map((row) => ScoredEntryId(
row['entryId'] as int,
row['score'] as int,
))
.toList());
}
Future<int> _queryKanaCount(
DatabaseExecutor connection,
String word,
) {
final (query, args) = _kanjiReadingTemplate(
JMdictTableNames.readingElement,
word,
countOnly: true,
);
return connection
.rawQuery(query, args)
.then((result) => result.first['count'] as int);
}
Future<List<ScoredEntryId>> _queryEnglish(
DatabaseExecutor connection,
String word,
int pageSize,
int? offset,
) async {
final result = await connection.rawQuery(
'''
SELECT
"${JMdictTableNames.sense}"."entryId",
MAX("JMdict_EntryScore"."score")
+ (("${JMdictTableNames.senseGlossary}"."phrase" = ? AND "${JMdictTableNames.sense}"."orderNum" = 1) * 50)
+ (("${JMdictTableNames.senseGlossary}"."phrase" = ? AND "${JMdictTableNames.sense}"."orderNum" = 2) * 30)
+ (("${JMdictTableNames.senseGlossary}"."phrase" = ?) * 20)
as "score"
FROM "${JMdictTableNames.senseGlossary}"
JOIN "${JMdictTableNames.sense}" USING ("senseId")
JOIN "JMdict_EntryScore" USING ("entryId")
WHERE "${JMdictTableNames.senseGlossary}"."phrase" LIKE ?
GROUP BY "JMdict_EntryScore"."entryId"
ORDER BY
"score" DESC,
"${JMdictTableNames.sense}"."entryId" ASC
LIMIT ?
OFFSET ?
'''
.trim(),
[
word,
word,
word,
'%${word.replaceAll('%', '')}%',
pageSize,
offset,
],
);
return result
.map((row) => ScoredEntryId(
row['entryId'] as int,
row['score'] as int,
))
.toList();
}
Future<int> _queryEnglishCount(
DatabaseExecutor connection,
String word,
) async {
final result = await connection.rawQuery(
'''
SELECT
COUNT(DISTINCT "${JMdictTableNames.sense}"."entryId") AS "count"
FROM "${JMdictTableNames.senseGlossary}"
JOIN "${JMdictTableNames.sense}" USING ("senseId")
WHERE "${JMdictTableNames.senseGlossary}"."phrase" LIKE ?
'''
.trim(),
[
'%$word%',
],
);
return result.first['count'] as int;
}
Future<List<ScoredEntryId>> fetchEntryIds(
DatabaseExecutor connection,
String word,
SearchMode searchMode,
int pageSize,
int? offset,
) async {
if (searchMode == SearchMode.Auto) {
searchMode = _determineSearchMode(word);
}
assert(
word.isNotEmpty,
'Word should not be empty when fetching entry IDs',
);
late final List<ScoredEntryId> entryIds;
switch (searchMode) {
case SearchMode.Kanji:
entryIds = await _queryKanji(
connection,
word,
pageSize,
offset,
);
break;
case SearchMode.Kana:
entryIds = await _queryKana(
connection,
word,
pageSize,
offset,
);
break;
case SearchMode.English:
entryIds = await _queryEnglish(
connection,
word,
pageSize,
offset,
);
break;
case SearchMode.MixedKana:
case SearchMode.MixedKanji:
default:
throw UnimplementedError(
'Search mode $searchMode is not implemented',
);
}
;
return entryIds;
}
Future<int?> fetchEntryIdCount(
DatabaseExecutor connection,
String word,
SearchMode searchMode,
) async {
if (searchMode == SearchMode.Auto) {
searchMode = _determineSearchMode(word);
}
assert(
word.isNotEmpty,
'Word should not be empty when fetching entry IDs',
);
late final int? entryIdCount;
switch (searchMode) {
case SearchMode.Kanji:
entryIdCount = await _queryKanjiCount(
connection,
word,
);
break;
case SearchMode.Kana:
entryIdCount = await _queryKanaCount(
connection,
word,
);
break;
case SearchMode.English:
entryIdCount = await _queryEnglishCount(
connection,
word,
);
break;
case SearchMode.MixedKana:
case SearchMode.MixedKanji:
default:
throw UnimplementedError(
'Search mode $searchMode is not implemented',
);
}
return entryIdCount;
}

View File

@@ -0,0 +1,300 @@
import 'package:collection/collection.dart';
import 'package:jadb/models/common/jlpt_level.dart';
import 'package:jadb/models/jmdict/jmdict_dialect.dart';
import 'package:jadb/models/jmdict/jmdict_field.dart';
import 'package:jadb/models/jmdict/jmdict_kanji_info.dart';
import 'package:jadb/models/jmdict/jmdict_misc.dart';
import 'package:jadb/models/jmdict/jmdict_pos.dart';
import 'package:jadb/models/jmdict/jmdict_reading_info.dart';
import 'package:jadb/models/word_search/word_search_result.dart';
import 'package:jadb/models/word_search/word_search_ruby.dart';
import 'package:jadb/models/word_search/word_search_sense.dart';
import 'package:jadb/models/word_search/word_search_sense_language_source.dart';
import 'package:jadb/models/word_search/word_search_sources.dart';
import 'package:jadb/models/word_search/word_search_xref_entry.dart';
import 'package:jadb/search/word_search/entry_id_query.dart';
List<WordSearchResult> regroupWordSearchResults({
required List<ScoredEntryId> entryIds,
required List<Map<String, Object?>> readingElements,
required List<Map<String, Object?>> kanjiElements,
required List<Map<String, Object?>> jlptTags,
required List<Map<String, Object?>> commonEntries,
required List<Map<String, Object?>> senses,
required List<Map<String, Object?>> senseAntonyms,
required List<Map<String, Object?>> senseDialects,
required List<Map<String, Object?>> senseFields,
required List<Map<String, Object?>> senseGlossaries,
required List<Map<String, Object?>> senseInfos,
required List<Map<String, Object?>> senseLanguageSources,
required List<Map<String, Object?>> senseMiscs,
required List<Map<String, Object?>> sensePOSs,
required List<Map<String, Object?>> senseRestrictedToKanjis,
required List<Map<String, Object?>> senseRestrictedToReadings,
required List<Map<String, Object?>> senseSeeAlsos,
required List<Map<String, Object?>> exampleSentences,
required List<Map<String, Object?>> readingElementInfos,
required List<Map<String, Object?>> readingElementRestrictions,
required List<Map<String, Object?>> kanjiElementInfos,
}) {
final List<WordSearchResult> results = [];
final commonEntryIds =
commonEntries.map((entry) => entry['entryId'] as int).toSet();
for (final scoredEntryId in entryIds) {
final List<Map<String, Object?>> entryReadingElements = readingElements
.where((element) => element['entryId'] == scoredEntryId.entryId)
.toList();
final List<Map<String, Object?>> entryKanjiElements = kanjiElements
.where((element) => element['entryId'] == scoredEntryId.entryId)
.toList();
final List<Map<String, Object?>> entryJlptTags = jlptTags
.where((element) => element['entryId'] == scoredEntryId.entryId)
.toList();
final jlptLevel = entryJlptTags
.map((e) => JlptLevel.fromString(e['jlptLevel'] as String?))
.sorted((a, b) => b.compareTo(a))
.firstOrNull ??
JlptLevel.none;
final isCommon = commonEntryIds.contains(scoredEntryId.entryId);
final List<Map<String, Object?>> entrySenses = senses
.where((element) => element['entryId'] == scoredEntryId.entryId)
.toList();
final GroupedWordResult entryReadingElementsGrouped = _regroup_words(
entryId: scoredEntryId.entryId,
readingElements: entryReadingElements,
kanjiElements: entryKanjiElements,
readingElementInfos: readingElementInfos,
readingElementRestrictions: readingElementRestrictions,
kanjiElementInfos: kanjiElementInfos,
);
final List<WordSearchSense> entrySensesGrouped = _regroup_senses(
senses: entrySenses,
senseAntonyms: senseAntonyms,
senseDialects: senseDialects,
senseFields: senseFields,
senseGlossaries: senseGlossaries,
senseInfos: senseInfos,
senseLanguageSources: senseLanguageSources,
senseMiscs: senseMiscs,
sensePOSs: sensePOSs,
senseRestrictedToKanjis: senseRestrictedToKanjis,
senseRestrictedToReadings: senseRestrictedToReadings,
senseSeeAlsos: senseSeeAlsos,
exampleSentences: exampleSentences,
);
results.add(
WordSearchResult(
score: scoredEntryId.score,
entryId: scoredEntryId.entryId,
isCommon: isCommon,
japanese: entryReadingElementsGrouped.rubys,
kanjiInfo: entryReadingElementsGrouped.kanjiInfos,
readingInfo: entryReadingElementsGrouped.readingInfos,
senses: entrySensesGrouped,
jlptLevel: jlptLevel,
sources: const WordSearchSources(
jmdict: true,
jmnedict: false,
),
),
);
}
return results;
}
class GroupedWordResult {
final List<WordSearchRuby> rubys;
final Map<String, JMdictReadingInfo> readingInfos;
final Map<String, JMdictKanjiInfo> kanjiInfos;
const GroupedWordResult({
required this.rubys,
required this.readingInfos,
required this.kanjiInfos,
});
}
GroupedWordResult _regroup_words({
required int entryId,
required List<Map<String, Object?>> kanjiElements,
required List<Map<String, Object?>> kanjiElementInfos,
required List<Map<String, Object?>> readingElements,
required List<Map<String, Object?>> readingElementInfos,
required List<Map<String, Object?>> readingElementRestrictions,
}) {
final List<WordSearchRuby> rubys = [];
final kanjiElements_ =
kanjiElements.where((element) => element['entryId'] == entryId).toList();
final readingElements_ = readingElements
.where((element) => element['entryId'] == entryId)
.toList();
final readingElementRestrictions_ = readingElementRestrictions
.where((element) => element['entryId'] == entryId)
.toList();
for (final readingElement in readingElements_) {
if (readingElement['doesNotMatchKanji'] == 1 || kanjiElements_.isEmpty) {
final ruby = WordSearchRuby(
base: readingElement['reading'] as String,
);
rubys.add(ruby);
continue;
}
for (final kanjiElement in kanjiElements_) {
final kanji = kanjiElement['reading'] as String;
final reading = readingElement['reading'] as String;
final restrictions = readingElementRestrictions_
.where((element) => element['reading'] == reading)
.toList();
if (restrictions.isNotEmpty &&
!restrictions.any((element) => element['restriction'] == kanji)) {
continue;
}
final ruby = WordSearchRuby(
base: kanji,
furigana: reading,
);
rubys.add(ruby);
}
}
assert(
rubys.isNotEmpty,
'No readings found for entryId: $entryId',
);
return GroupedWordResult(
rubys: rubys,
readingInfos: {
for (final rei in readingElementInfos)
rei['reading'] as String:
JMdictReadingInfo.fromId(rei['info'] as String),
},
kanjiInfos: {
for (final kei in kanjiElementInfos)
kei['reading'] as String: JMdictKanjiInfo.fromId(kei['info'] as String),
},
);
}
List<WordSearchSense> _regroup_senses({
required List<Map<String, Object?>> senses,
required List<Map<String, Object?>> senseAntonyms,
required List<Map<String, Object?>> senseDialects,
required List<Map<String, Object?>> senseFields,
required List<Map<String, Object?>> senseGlossaries,
required List<Map<String, Object?>> senseInfos,
required List<Map<String, Object?>> senseLanguageSources,
required List<Map<String, Object?>> senseMiscs,
required List<Map<String, Object?>> sensePOSs,
required List<Map<String, Object?>> senseRestrictedToKanjis,
required List<Map<String, Object?>> senseRestrictedToReadings,
required List<Map<String, Object?>> senseSeeAlsos,
required List<Map<String, Object?>> exampleSentences,
}) {
final groupedSenseAntonyms =
senseAntonyms.groupListsBy((element) => element['senseId'] as int);
final groupedSenseDialects =
senseDialects.groupListsBy((element) => element['senseId'] as int);
final groupedSenseFields =
senseFields.groupListsBy((element) => element['senseId'] as int);
final groupedSenseGlossaries =
senseGlossaries.groupListsBy((element) => element['senseId'] as int);
final groupedSenseInfos =
senseInfos.groupListsBy((element) => element['senseId'] as int);
final groupedSenseLanguageSources =
senseLanguageSources.groupListsBy((element) => element['senseId'] as int);
final groupedSenseMiscs =
senseMiscs.groupListsBy((element) => element['senseId'] as int);
final groupedSensePOSs =
sensePOSs.groupListsBy((element) => element['senseId'] as int);
final groupedSenseRestrictedToKanjis = senseRestrictedToKanjis
.groupListsBy((element) => element['senseId'] as int);
final groupedSenseRestrictedToReadings = senseRestrictedToReadings
.groupListsBy((element) => element['senseId'] as int);
final groupedSenseSeeAlsos =
senseSeeAlsos.groupListsBy((element) => element['senseId'] as int);
final List<WordSearchSense> result = [];
for (final sense in senses) {
final int senseId = sense['senseId'] as int;
final antonyms = groupedSenseAntonyms[senseId] ?? [];
final dialects = groupedSenseDialects[senseId] ?? [];
final fields = groupedSenseFields[senseId] ?? [];
final glossaries = groupedSenseGlossaries[senseId] ?? [];
final infos = groupedSenseInfos[senseId] ?? [];
final languageSources = groupedSenseLanguageSources[senseId] ?? [];
final miscs = groupedSenseMiscs[senseId] ?? [];
final pos = groupedSensePOSs[senseId] ?? [];
final restrictedToKanjis = groupedSenseRestrictedToKanjis[senseId] ?? [];
final restrictedToReadings =
groupedSenseRestrictedToReadings[senseId] ?? [];
final seeAlsos = groupedSenseSeeAlsos[senseId] ?? [];
final resultSense = WordSearchSense(
englishDefinitions: glossaries.map((e) => e['phrase'] as String).toList(),
partsOfSpeech:
pos.map((e) => JMdictPOS.fromId(e['pos'] as String)).toList(),
seeAlso: seeAlsos
.map((e) => WordSearchXrefEntry(
entryId: e['xrefEntryId'] as int,
baseWord: e['base'] as String,
furigana: e['furigana'] as String?,
ambiguous: e['ambiguous'] == 1,
))
.toList(),
antonyms: antonyms
.map((e) => WordSearchXrefEntry(
entryId: e['xrefEntryId'] as int,
baseWord: e['base'] as String,
furigana: e['furigana'] as String?,
ambiguous: e['ambiguous'] == 1,
))
.toList(),
restrictedToReading:
restrictedToReadings.map((e) => e['reading'] as String).toList(),
restrictedToKanji:
restrictedToKanjis.map((e) => e['kanji'] as String).toList(),
fields:
fields.map((e) => JMdictField.fromId(e['field'] as String)).toList(),
dialects: dialects
.map((e) => JMdictDialect.fromId(e['dialect'] as String))
.toList(),
misc: miscs.map((e) => JMdictMisc.fromId(e['misc'] as String)).toList(),
info: infos.map((e) => e['info'] as String).toList(),
languageSource: languageSources
.map((e) => WordSearchSenseLanguageSource(
language: e['language'] as String,
phrase: e['phrase'] as String?,
fullyDescribesSense: e['fullyDescribesSense'] == 1,
constructedFromSmallerWords:
e['constructedFromSmallerWords'] == 1,
))
.toList(),
);
result.add(resultSense);
}
return result;
}

View File

@@ -0,0 +1,153 @@
// TODO: Support globs
// TODO: Support tags
// TODO: Prefer original kana type when sorting results
// TODO: Support mixing kana and romaji
//
import 'package:jadb/models/word_search/word_search_result.dart';
import 'package:jadb/search/word_search/data_query.dart';
import 'package:jadb/search/word_search/entry_id_query.dart';
import 'package:jadb/search/word_search/regrouping.dart';
import 'package:jadb/table_names/jmdict.dart';
import 'package:sqflite_common/sqlite_api.dart';
enum SearchMode {
Auto,
English,
Kanji,
MixedKanji,
Kana,
MixedKana,
}
Future<List<WordSearchResult>?> searchWordWithDbConnection(
DatabaseExecutor connection,
String word,
SearchMode searchMode,
int page,
int pageSize,
) async {
if (word.isEmpty) {
return null;
}
final offset = page * pageSize;
final List<ScoredEntryId> entryIds = await fetchEntryIds(
connection,
word,
searchMode,
pageSize,
offset,
);
if (entryIds.isEmpty) {
return [];
}
final LinearWordQueryData linearWordQueryData =
await fetchLinearWordQueryData(
connection,
entryIds.map((e) => e.entryId).toList(),
);
final result = regroupWordSearchResults(
entryIds: entryIds,
readingElements: linearWordQueryData.readingElements,
kanjiElements: linearWordQueryData.kanjiElements,
jlptTags: linearWordQueryData.jlptTags,
commonEntries: linearWordQueryData.commonEntries,
senses: linearWordQueryData.senses,
senseAntonyms: linearWordQueryData.senseAntonyms,
senseDialects: linearWordQueryData.senseDialects,
senseFields: linearWordQueryData.senseFields,
senseGlossaries: linearWordQueryData.senseGlossaries,
senseInfos: linearWordQueryData.senseInfos,
senseLanguageSources: linearWordQueryData.senseLanguageSources,
senseMiscs: linearWordQueryData.senseMiscs,
sensePOSs: linearWordQueryData.sensePOSs,
senseRestrictedToKanjis: linearWordQueryData.senseRestrictedToKanjis,
senseRestrictedToReadings: linearWordQueryData.senseRestrictedToReadings,
senseSeeAlsos: linearWordQueryData.senseSeeAlsos,
exampleSentences: linearWordQueryData.exampleSentences,
readingElementInfos: linearWordQueryData.readingElementInfos,
readingElementRestrictions: linearWordQueryData.readingElementRestrictions,
kanjiElementInfos: linearWordQueryData.kanjiElementInfos,
);
return result;
}
Future<int?> searchWordCountWithDbConnection(
DatabaseExecutor connection,
String word,
SearchMode searchMode,
) async {
if (word.isEmpty) {
return null;
}
final int? entryIdCount = await fetchEntryIdCount(
connection,
word,
searchMode,
);
return entryIdCount;
}
Future<WordSearchResult?> getWordByIdWithDbConnection(
DatabaseExecutor connection,
int id,
) async {
if (id <= 0) {
return null;
}
final exists = await connection.rawQuery(
'SELECT EXISTS(SELECT 1 FROM "${JMdictTableNames.entry}" WHERE "entryId" = ?)',
[id],
).then((value) => value.isNotEmpty && value.first.values.first == 1);
if (!exists) {
return null;
}
final LinearWordQueryData linearWordQueryData =
await fetchLinearWordQueryData(
connection,
[id],
);
final result = regroupWordSearchResults(
entryIds: [ScoredEntryId(id, 0)],
readingElements: linearWordQueryData.readingElements,
kanjiElements: linearWordQueryData.kanjiElements,
jlptTags: linearWordQueryData.jlptTags,
commonEntries: linearWordQueryData.commonEntries,
senses: linearWordQueryData.senses,
senseAntonyms: linearWordQueryData.senseAntonyms,
senseDialects: linearWordQueryData.senseDialects,
senseFields: linearWordQueryData.senseFields,
senseGlossaries: linearWordQueryData.senseGlossaries,
senseInfos: linearWordQueryData.senseInfos,
senseLanguageSources: linearWordQueryData.senseLanguageSources,
senseMiscs: linearWordQueryData.senseMiscs,
sensePOSs: linearWordQueryData.sensePOSs,
senseRestrictedToKanjis: linearWordQueryData.senseRestrictedToKanjis,
senseRestrictedToReadings: linearWordQueryData.senseRestrictedToReadings,
senseSeeAlsos: linearWordQueryData.senseSeeAlsos,
exampleSentences: linearWordQueryData.exampleSentences,
readingElementInfos: linearWordQueryData.readingElementInfos,
readingElementRestrictions: linearWordQueryData.readingElementRestrictions,
kanjiElementInfos: linearWordQueryData.kanjiElementInfos,
);
assert(
result.length == 1,
'Expected exactly one result for entryId $id, but got ${result.length}',
);
return result.firstOrNull;
}

View File

@@ -1,7 +1,5 @@
abstract class JMdictTableNames {
static const String entry = 'JMdict_Entry';
static const String entryByKana = 'JMdict_EntryByKana';
static const String entryByEnglish = 'JMdict_EntryByEnglish';
static const String kanjiElement = 'JMdict_KanjiElement';
static const String kanjiInfo = 'JMdict_KanjiElementInfo';
static const String readingElement = 'JMdict_ReadingElement';
@@ -23,8 +21,6 @@ abstract class JMdictTableNames {
static Set<String> get allTables => {
entry,
entryByKana,
entryByEnglish,
kanjiElement,
kanjiInfo,
readingElement,

View File

@@ -289,15 +289,16 @@ extension on DateTime {
}
String get japaneseWeekdayPrefix => [
'',
'',
'',
'',
'',
'',
'',
][weekday - 1];
'',
'',
'',
'',
'',
'',
'',
][weekday - 1];
/// Returns the date in Japanese format.
String japaneseDate({bool showWeekday = false}) => '$month月$day日' + (showWeekday ? '$japaneseWeekdayPrefix' : '');
String japaneseDate({bool showWeekday = false}) =>
'$month月$day日' + (showWeekday ? '$japaneseWeekdayPrefix' : '');
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,247 @@
import 'package:jadb/util/lemmatizer/rules.dart';
enum WordClass {
noun,
ichidanVerb,
godanVerb,
irregularVerb,
iAdjective,
nAdjective,
adverb,
particle,
input,
}
enum LemmatizationRuleType {
prefix,
suffix,
}
class LemmatizationRule {
final String name;
final AllomorphPattern pattern;
final WordClass wordClass;
final List<WordClass>? validChildClasses;
final bool terminal;
const LemmatizationRule({
required this.name,
required this.pattern,
required this.wordClass,
this.validChildClasses,
this.terminal = false,
});
bool matches(String word) => pattern.matches(word);
List<String>? apply(String word) => pattern.apply(word);
LemmatizationRule.simple({
required String name,
required String pattern,
required String? replacement,
required WordClass wordClass,
validChildClasses,
terminal = false,
lookAheadBehind = const [''],
LemmatizationRuleType type = LemmatizationRuleType.suffix,
}) : this(
name: name,
pattern: AllomorphPattern(
patterns: {
pattern: replacement != null ? [replacement] : null
},
type: type,
lookAheadBehind: lookAheadBehind,
),
validChildClasses: validChildClasses,
terminal: terminal,
wordClass: wordClass,
);
}
/// Represents a set of patterns for matching allomorphs in a word.
/// The patterns can be either a prefix or a suffix, and they can include
/// replacement characters for deconjugating into base forms.
class AllomorphPattern {
final List<Pattern> lookAheadBehind;
final Map<String, List<String>?> patterns;
final LemmatizationRuleType type;
const AllomorphPattern({
required this.patterns,
required this.type,
this.lookAheadBehind = const [''],
});
List<(String, Pattern)> get allPatternCombinations {
final combinations = <(String, Pattern)>[];
for (final l in lookAheadBehind) {
for (final p in patterns.keys) {
switch ((type, l is RegExp)) {
case (LemmatizationRuleType.prefix, true):
combinations.add((p, RegExp('^($p)(${(l as RegExp).pattern})')));
break;
case (LemmatizationRuleType.prefix, false):
combinations.add((p, '$p$l'));
break;
case (LemmatizationRuleType.suffix, true):
combinations.add((p, RegExp('(${(l as RegExp).pattern})($p)\$')));
break;
case (LemmatizationRuleType.suffix, false):
combinations.add((p, '$l$p'));
break;
}
}
}
return combinations;
}
bool matches(String word) {
for (final (_, p) in allPatternCombinations) {
if (p is String) {
if (type == LemmatizationRuleType.prefix
? word.startsWith(p)
: word.endsWith(p)) {
return true;
}
} else if (p is RegExp) {
if (p.hasMatch(word)) {
return true;
}
}
}
return false;
}
List<String>? apply(String word) {
for (final (affix, p) in allPatternCombinations) {
switch ((type, p is RegExp)) {
case (LemmatizationRuleType.prefix, true):
final match = (p as RegExp).firstMatch(word);
if (match != null) {
final prefix = match.group(1)!;
assert(prefix == affix);
final suffix = word.substring(prefix.length);
return patterns[prefix] != null
? patterns[prefix]!.map((s) => s + suffix).toList()
: [suffix];
}
break;
case (LemmatizationRuleType.prefix, false):
if (word.startsWith(p as String)) {
return patterns[affix] != null
? patterns[affix]!
.map((s) => s + word.substring(affix.length))
.toList()
: [word.substring(affix.length)];
}
break;
case (LemmatizationRuleType.suffix, true):
final match = (p as RegExp).firstMatch(word);
if (match != null) {
final suffix = match.group(2)!;
assert(suffix == affix);
final prefix = word.substring(0, word.length - suffix.length);
return patterns[suffix] != null
? patterns[suffix]!.map((s) => prefix + s).toList()
: [prefix];
}
break;
case (LemmatizationRuleType.suffix, false):
if (word.endsWith(p as String)) {
final prefix = word.substring(0, word.length - affix.length);
return patterns[affix] != null
? patterns[affix]!.map((s) => prefix + s).toList()
: [prefix];
}
break;
}
}
return null;
}
}
class Lemmatized {
final String original;
final LemmatizationRule rule;
final int variant;
final List<Lemmatized> children;
const Lemmatized({
required this.original,
required this.rule,
this.variant = 0,
this.children = const [],
});
String? get applied {
final applied = rule.apply(original);
if (applied == null || applied.isEmpty) {
return null;
}
return applied[variant];
}
@override
String toString() {
final childrenString = children
.map((c) => ' - ' + c.toString().split('\n').join('\n '))
.join('\n');
if (children.isEmpty) {
return '$original (${rule.name}) -> ${applied ?? '<null>'}';
} else {
return '$original (${rule.name}) -> ${applied ?? '<null>'}\n$childrenString';
}
}
}
List<Lemmatized> _lemmatize(LemmatizationRule parentRule, String word) {
final children = <Lemmatized>[];
if (parentRule.terminal) {
return children;
}
final filteredLemmatizationRules = parentRule.validChildClasses == null
? lemmatizationRules
: lemmatizationRules.where(
(r) => parentRule.validChildClasses!.contains(r.wordClass),
);
for (final rule in filteredLemmatizationRules) {
if (rule.matches(word)) {
final applied = rule.apply(word);
for (final (i, a) in (applied ?? []).indexed) {
final subChildren = _lemmatize(rule, a);
children.add(
Lemmatized(
original: word,
rule: rule,
variant: i,
children: subChildren,
),
);
}
}
}
return children;
}
Lemmatized lemmatize(String word) {
final inputRule = LemmatizationRule.simple(
name: 'Input',
pattern: '',
replacement: null,
wordClass: WordClass.input,
);
return Lemmatized(
original: word,
rule: inputRule,
children: _lemmatize(
inputRule,
word,
),
);
}

View File

@@ -0,0 +1,10 @@
import 'package:jadb/util/lemmatizer/lemmatizer.dart';
import 'package:jadb/util/lemmatizer/rules/godan-verbs.dart';
import 'package:jadb/util/lemmatizer/rules/i-adjectives.dart';
import 'package:jadb/util/lemmatizer/rules/ichidan-verbs.dart';
List<LemmatizationRule> lemmatizationRules = [
...ichidanVerbLemmatizationRules,
...godanVerbLemmatizationRules,
...iAdjectiveLemmatizationRules,
];

View File

@@ -0,0 +1,457 @@
import 'package:jadb/util/lemmatizer/lemmatizer.dart';
List<LemmatizationRule> godanVerbLemmatizationRules = [
LemmatizationRule(
name: 'Godan verb - base form',
terminal: true,
pattern: AllomorphPattern(
patterns: {
'': [''],
'': [''],
'': [''],
'': [''],
'': [''],
'': [''],
'': [''],
'': [''],
'': [''],
},
type: LemmatizationRuleType.suffix,
),
validChildClasses: [WordClass.godanVerb],
wordClass: WordClass.godanVerb,
),
LemmatizationRule(
name: 'Godan verb - negative form',
pattern: AllomorphPattern(
patterns: {
'わない': [''],
'かない': [''],
'がない': [''],
'さない': [''],
'たない': [''],
'なない': [''],
'ばない': [''],
'まない': [''],
'らない': [''],
},
type: LemmatizationRuleType.suffix,
),
validChildClasses: [WordClass.godanVerb],
wordClass: WordClass.godanVerb,
),
LemmatizationRule(
name: 'Godan verb - past form',
pattern: AllomorphPattern(
patterns: {
'した': [''],
'った': ['', '', ''],
'んだ': ['', '', ''],
'いだ': [''],
'いた': [''],
},
type: LemmatizationRuleType.suffix,
),
validChildClasses: [WordClass.godanVerb],
wordClass: WordClass.godanVerb,
),
LemmatizationRule(
name: 'Godan verb - te-form',
pattern: AllomorphPattern(
patterns: {
'いて': ['', ''],
'して': [''],
'って': ['', '', ''],
'んで': ['', '', ''],
},
type: LemmatizationRuleType.suffix,
),
validChildClasses: [WordClass.godanVerb],
wordClass: WordClass.godanVerb,
),
LemmatizationRule(
name: 'Godan verb - te-form with いる',
pattern: AllomorphPattern(
patterns: {
'いている': ['', ''],
'している': [''],
'っている': ['', '', ''],
'んでいる': ['', '', ''],
},
type: LemmatizationRuleType.suffix,
),
validChildClasses: [WordClass.godanVerb],
wordClass: WordClass.godanVerb,
),
LemmatizationRule(
name: 'Godan verb - te-form with いた',
pattern: AllomorphPattern(
patterns: {
'いていた': ['', ''],
'していた': [''],
'っていた': ['', '', ''],
'んでいた': ['', '', ''],
},
type: LemmatizationRuleType.suffix,
),
validChildClasses: [WordClass.godanVerb],
wordClass: WordClass.godanVerb,
),
LemmatizationRule(
name: 'Godan verb - conditional form',
pattern: AllomorphPattern(
patterns: {
'けば': [''],
'げば': [''],
'せば': [''],
'てば': ['', '', ''],
'ねば': [''],
'べば': [''],
'めば': [''],
},
type: LemmatizationRuleType.suffix,
),
validChildClasses: [WordClass.godanVerb],
wordClass: WordClass.godanVerb,
),
LemmatizationRule(
name: 'Godan verb - volitional form',
pattern: AllomorphPattern(
patterns: {
'おう': [''],
'こう': [''],
'ごう': [''],
'そう': [''],
'とう': ['', '', ''],
'のう': [''],
'ぼう': [''],
'もう': [''],
},
type: LemmatizationRuleType.suffix,
),
validChildClasses: [WordClass.godanVerb],
wordClass: WordClass.godanVerb,
),
LemmatizationRule(
name: 'Godan verb - potential form',
pattern: AllomorphPattern(
patterns: {
'ける': [''],
'げる': [''],
'せる': [''],
'てる': ['', '', ''],
'ねる': [''],
'べる': [''],
'める': [''],
},
type: LemmatizationRuleType.suffix,
),
validChildClasses: [WordClass.godanVerb],
wordClass: WordClass.godanVerb,
),
LemmatizationRule(
name: 'Godan verb - passive form',
pattern: AllomorphPattern(
patterns: {
'かれる': [''],
'がれる': [''],
'される': [''],
'たれる': ['', '', ''],
'なれる': [''],
'ばれる': [''],
'まれる': [''],
},
type: LemmatizationRuleType.suffix,
),
validChildClasses: [WordClass.godanVerb],
wordClass: WordClass.godanVerb,
),
LemmatizationRule(
name: 'Godan verb - causative form',
pattern: AllomorphPattern(
patterns: {
'かせる': [''],
'がせる': [''],
'させる': [''],
'たせる': ['', '', ''],
'なせる': [''],
'ばせる': [''],
'ませる': [''],
},
type: LemmatizationRuleType.suffix,
),
validChildClasses: [WordClass.godanVerb],
wordClass: WordClass.godanVerb,
),
LemmatizationRule(
name: 'Godan verb - causative-passive form',
pattern: AllomorphPattern(
patterns: {
'かされる': [''],
'がされる': [''],
'される': [''],
'たされる': ['', '', ''],
'なされる': [''],
'ばされる': [''],
'まされる': [''],
},
type: LemmatizationRuleType.suffix,
),
validChildClasses: [WordClass.godanVerb],
wordClass: WordClass.godanVerb,
),
LemmatizationRule(
name: 'Godan verb - imperative form',
pattern: AllomorphPattern(
patterns: {
'': [''],
'': [''],
'': [''],
'': [''],
'': ['', '', ''],
'': [''],
'': [''],
'': [''],
},
type: LemmatizationRuleType.suffix,
),
validChildClasses: [WordClass.godanVerb],
wordClass: WordClass.godanVerb,
),
LemmatizationRule(
name: 'Godan verb - negative past form',
pattern: AllomorphPattern(
patterns: {
'わなかった': [''],
'かなかった': [''],
'がなかった': [''],
'さなかった': [''],
'たなかった': [''],
'ななかった': [''],
'ばなかった': [''],
'まなかった': [''],
'らなかった': [''],
},
type: LemmatizationRuleType.suffix,
),
validChildClasses: [WordClass.godanVerb],
wordClass: WordClass.godanVerb,
),
LemmatizationRule(
name: 'Godan verb - negative te-form',
pattern: AllomorphPattern(
patterns: {
'わなくて': [''],
'かなくて': [''],
'がなくて': [''],
'さなくて': [''],
'たなくて': [''],
'ななくて': [''],
'ばなくて': [''],
'まなくて': [''],
'らなくて': [''],
},
type: LemmatizationRuleType.suffix,
),
validChildClasses: [WordClass.godanVerb],
wordClass: WordClass.godanVerb,
),
LemmatizationRule(
name: 'Godan verb - negative conditional form',
pattern: AllomorphPattern(
patterns: {
'わなければ': [''],
'かなければ': [''],
'がなければ': [''],
'さなければ': [''],
'たなければ': [''],
'ななければ': [''],
'ばなければ': [''],
'まなければ': [''],
'らなければ': [''],
},
type: LemmatizationRuleType.suffix,
),
validChildClasses: [WordClass.godanVerb],
wordClass: WordClass.godanVerb,
),
LemmatizationRule(
name: 'Godan verb - negative volitional form',
pattern: AllomorphPattern(
patterns: {
'うまい': [''],
'くまい': [''],
'ぐまい': [''],
'すまい': [''],
'つまい': ['', '', ''],
'ぬまい': [''],
'ぶまい': [''],
'むまい': [''],
},
type: LemmatizationRuleType.suffix,
),
validChildClasses: [WordClass.godanVerb],
wordClass: WordClass.godanVerb,
),
LemmatizationRule(
name: 'Godan verb - negative potential form',
pattern: AllomorphPattern(
patterns: {
'けない': [''],
'げない': [''],
'せない': [''],
'てない': ['', '', ''],
'ねない': [''],
'べない': [''],
'めない': [''],
},
type: LemmatizationRuleType.suffix,
),
validChildClasses: [WordClass.godanVerb],
wordClass: WordClass.godanVerb,
),
LemmatizationRule(
name: 'Godan verb - negative passive form',
pattern: AllomorphPattern(
patterns: {
'かれない': [''],
'がれない': [''],
'されない': [''],
'たれない': ['', '', ''],
'なれない': [''],
'ばれない': [''],
'まれない': [''],
},
type: LemmatizationRuleType.suffix,
),
validChildClasses: [WordClass.godanVerb],
wordClass: WordClass.godanVerb,
),
LemmatizationRule(
name: 'Godan verb - negative causative form',
pattern: AllomorphPattern(
patterns: {
'かせない': [''],
'がせない': [''],
'させない': [''],
'たせない': ['', '', ''],
'なせない': [''],
'ばせない': [''],
'ませない': [''],
},
type: LemmatizationRuleType.suffix,
),
validChildClasses: [WordClass.godanVerb],
wordClass: WordClass.godanVerb,
),
LemmatizationRule(
name: 'Godan verb - negative causative-passive form',
pattern: AllomorphPattern(
patterns: {
'かされない': [''],
'がされない': [''],
'されない': [''],
'たされない': ['', '', ''],
'なされない': [''],
'ばされない': [''],
'まされない': [''],
},
type: LemmatizationRuleType.suffix,
),
validChildClasses: [WordClass.godanVerb],
wordClass: WordClass.godanVerb,
),
LemmatizationRule(
name: 'Godan verb - negative imperative form',
pattern: AllomorphPattern(
patterns: {
'うな': [''],
'くな': [''],
'ぐな': [''],
'すな': [''],
'つな': [''],
'ぬな': [''],
'ぶな': [''],
'むな': [''],
'るな': [''],
},
type: LemmatizationRuleType.suffix,
),
validChildClasses: [WordClass.godanVerb],
wordClass: WordClass.godanVerb,
),
LemmatizationRule(
name: 'Godan verb - desire form',
pattern: AllomorphPattern(
patterns: {
'きたい': [''],
'ぎたい': [''],
'したい': [''],
'ちたい': [''],
'にたい': [''],
'びたい': [''],
'みたい': [''],
'りたい': [''],
},
type: LemmatizationRuleType.suffix,
),
validChildClasses: [WordClass.godanVerb],
wordClass: WordClass.godanVerb,
),
LemmatizationRule(
name: 'Godan verb - negative desire form',
pattern: AllomorphPattern(
patterns: {
'いたくない': [''],
'きたくない': [''],
'ぎたくない': [''],
'したくない': [''],
'ちたくない': [''],
'にたくない': [''],
'びたくない': [''],
'みたくない': [''],
'りたくない': [''],
},
type: LemmatizationRuleType.suffix,
),
validChildClasses: [WordClass.godanVerb],
wordClass: WordClass.godanVerb,
),
LemmatizationRule(
name: 'Godan verb - past desire form',
pattern: AllomorphPattern(
patterns: {
'きたかった': [''],
'ぎたかった': [''],
'したかった': [''],
'ちたかった': [''],
'にたかった': [''],
'びたかった': [''],
'みたかった': [''],
'りたかった': [''],
},
type: LemmatizationRuleType.suffix,
),
validChildClasses: [WordClass.godanVerb],
wordClass: WordClass.godanVerb,
),
LemmatizationRule(
name: 'Godan verb - negative past desire form',
pattern: AllomorphPattern(
patterns: {
'いたくなかった': [''],
'きたくなかった': [''],
'ぎたくなかった': [''],
'したくなかった': [''],
'ちたくなかった': [''],
'にたくなかった': [''],
'びたくなかった': [''],
'みたくなかった': [''],
'りたくなかった': [''],
},
type: LemmatizationRuleType.suffix,
),
validChildClasses: [WordClass.godanVerb],
wordClass: WordClass.godanVerb,
),
];

View File

@@ -0,0 +1,61 @@
import 'package:jadb/util/lemmatizer/lemmatizer.dart';
List<LemmatizationRule> iAdjectiveLemmatizationRules = [
LemmatizationRule.simple(
name: 'I adjective - base form',
terminal: true,
pattern: '',
replacement: '',
validChildClasses: [WordClass.iAdjective],
wordClass: WordClass.iAdjective,
),
LemmatizationRule.simple(
name: 'I adjective - negative form',
pattern: 'くない',
replacement: '',
validChildClasses: [WordClass.iAdjective],
wordClass: WordClass.iAdjective,
),
LemmatizationRule.simple(
name: 'I adjective - past form',
pattern: 'かった',
replacement: '',
validChildClasses: [WordClass.iAdjective],
wordClass: WordClass.iAdjective,
),
LemmatizationRule.simple(
name: 'I adjective - negative past form',
pattern: 'くなかった',
replacement: '',
validChildClasses: [WordClass.iAdjective],
wordClass: WordClass.iAdjective,
),
LemmatizationRule.simple(
name: 'I adjective - te-form',
pattern: 'くて',
replacement: '',
validChildClasses: [WordClass.iAdjective],
wordClass: WordClass.iAdjective,
),
LemmatizationRule.simple(
name: 'I adjective - conditional form',
pattern: 'ければ',
replacement: '',
validChildClasses: [WordClass.iAdjective],
wordClass: WordClass.iAdjective,
),
LemmatizationRule.simple(
name: 'I adjective - volitional form',
pattern: 'かろう',
replacement: '',
validChildClasses: [WordClass.iAdjective],
wordClass: WordClass.iAdjective,
),
LemmatizationRule.simple(
name: 'I adjective - continuative form',
pattern: '',
replacement: '',
validChildClasses: [WordClass.iAdjective],
wordClass: WordClass.iAdjective,
),
];

View File

@@ -0,0 +1,241 @@
import 'package:jadb/util/lemmatizer/lemmatizer.dart';
import 'package:jadb/util/text_filtering.dart';
List<Pattern> lookBehinds = [
kanjiRegex,
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
];
List<LemmatizationRule> ichidanVerbLemmatizationRules = [
LemmatizationRule.simple(
name: 'Ichidan verb - base form',
terminal: true,
pattern: '',
replacement: '',
lookAheadBehind: lookBehinds,
validChildClasses: [WordClass.ichidanVerb],
wordClass: WordClass.ichidanVerb,
),
LemmatizationRule.simple(
name: 'Ichidan verb - negative form',
pattern: 'ない',
replacement: '',
lookAheadBehind: lookBehinds,
validChildClasses: [WordClass.ichidanVerb],
wordClass: WordClass.ichidanVerb,
),
LemmatizationRule.simple(
name: 'Ichidan verb - past form',
pattern: '',
replacement: '',
lookAheadBehind: lookBehinds,
validChildClasses: [WordClass.ichidanVerb],
wordClass: WordClass.ichidanVerb,
),
LemmatizationRule.simple(
name: 'Ichidan verb - te-form',
pattern: '',
replacement: '',
lookAheadBehind: lookBehinds,
validChildClasses: [WordClass.ichidanVerb],
wordClass: WordClass.ichidanVerb,
),
LemmatizationRule.simple(
name: 'Ichidan verb - te-form with いる',
pattern: 'ている',
replacement: '',
lookAheadBehind: lookBehinds,
validChildClasses: [WordClass.ichidanVerb],
wordClass: WordClass.ichidanVerb,
),
LemmatizationRule.simple(
name: 'Ichidan verb - te-form with いた',
pattern: 'ていた',
replacement: '',
lookAheadBehind: lookBehinds,
validChildClasses: [WordClass.ichidanVerb],
wordClass: WordClass.ichidanVerb,
),
LemmatizationRule.simple(
name: 'Ichidan verb - conditional form',
pattern: 'れば',
replacement: '',
lookAheadBehind: lookBehinds,
validChildClasses: [WordClass.ichidanVerb],
wordClass: WordClass.ichidanVerb,
),
LemmatizationRule.simple(
name: 'Ichidan verb - volitional form',
pattern: 'よう',
replacement: '',
lookAheadBehind: lookBehinds,
validChildClasses: [WordClass.ichidanVerb],
wordClass: WordClass.ichidanVerb,
),
LemmatizationRule.simple(
name: 'Ichidan verb - potential form',
pattern: 'られる',
replacement: '',
lookAheadBehind: lookBehinds,
validChildClasses: [WordClass.ichidanVerb],
wordClass: WordClass.ichidanVerb,
),
LemmatizationRule.simple(
name: 'Ichidan verb - passive form',
pattern: 'られる',
replacement: '',
lookAheadBehind: lookBehinds,
validChildClasses: [WordClass.ichidanVerb],
wordClass: WordClass.ichidanVerb,
),
LemmatizationRule.simple(
name: 'Ichidan verb - causative form',
pattern: 'させる',
replacement: '',
lookAheadBehind: lookBehinds,
validChildClasses: [WordClass.ichidanVerb],
wordClass: WordClass.ichidanVerb,
),
LemmatizationRule.simple(
name: 'Ichidan verb - causative passive form',
pattern: 'させられる',
replacement: '',
lookAheadBehind: lookBehinds,
validChildClasses: [WordClass.ichidanVerb],
wordClass: WordClass.ichidanVerb,
),
LemmatizationRule.simple(
name: 'Ichidan verb - imperative form',
pattern: '',
replacement: '',
lookAheadBehind: lookBehinds,
validChildClasses: [WordClass.ichidanVerb],
wordClass: WordClass.ichidanVerb,
),
LemmatizationRule.simple(
name: 'Ichidan verb - negative past form',
pattern: 'なかった',
replacement: '',
lookAheadBehind: lookBehinds,
validChildClasses: [WordClass.ichidanVerb],
wordClass: WordClass.ichidanVerb,
),
LemmatizationRule.simple(
name: 'Ichidan verb - negative te-form',
pattern: 'なくて',
replacement: '',
lookAheadBehind: lookBehinds,
validChildClasses: [WordClass.ichidanVerb],
wordClass: WordClass.ichidanVerb,
),
LemmatizationRule.simple(
name: 'Ichidan verb - negative conditional form',
pattern: 'なければ',
replacement: '',
lookAheadBehind: lookBehinds,
validChildClasses: [WordClass.ichidanVerb],
wordClass: WordClass.ichidanVerb,
),
LemmatizationRule.simple(
name: 'Ichidan verb - negative volitional form',
pattern: 'なかろう',
replacement: '',
lookAheadBehind: lookBehinds,
validChildClasses: [WordClass.ichidanVerb],
wordClass: WordClass.ichidanVerb,
),
LemmatizationRule.simple(
name: 'Ichidan verb - negative potential form',
pattern: 'られない',
replacement: '',
lookAheadBehind: lookBehinds,
validChildClasses: [WordClass.ichidanVerb],
wordClass: WordClass.ichidanVerb,
),
LemmatizationRule.simple(
name: 'Ichidan verb - negative passive form',
pattern: 'られない',
replacement: '',
lookAheadBehind: lookBehinds,
validChildClasses: [WordClass.ichidanVerb],
wordClass: WordClass.ichidanVerb,
),
LemmatizationRule.simple(
name: 'Ichidan verb - negative causative form',
pattern: 'させない',
replacement: '',
lookAheadBehind: lookBehinds,
validChildClasses: [WordClass.ichidanVerb],
wordClass: WordClass.ichidanVerb,
),
LemmatizationRule.simple(
name: 'Ichidan verb - negative causative passive form',
pattern: 'させられない',
replacement: '',
lookAheadBehind: lookBehinds,
validChildClasses: [WordClass.ichidanVerb],
wordClass: WordClass.ichidanVerb,
),
LemmatizationRule.simple(
name: 'Ichidan verb - negative imperative form',
pattern: 'るな',
replacement: '',
lookAheadBehind: lookBehinds,
validChildClasses: [WordClass.ichidanVerb],
wordClass: WordClass.ichidanVerb,
),
LemmatizationRule.simple(
name: 'Ichidan verb - desire form',
pattern: 'たい',
replacement: '',
lookAheadBehind: lookBehinds,
validChildClasses: [WordClass.ichidanVerb],
wordClass: WordClass.ichidanVerb,
),
LemmatizationRule.simple(
name: 'Ichidan verb - negative desire form',
pattern: 'たくない',
replacement: '',
lookAheadBehind: lookBehinds,
validChildClasses: [WordClass.ichidanVerb],
wordClass: WordClass.ichidanVerb,
),
LemmatizationRule.simple(
name: 'Ichidan verb - past desire form',
pattern: 'たかった',
replacement: '',
lookAheadBehind: lookBehinds,
validChildClasses: [WordClass.ichidanVerb],
wordClass: WordClass.ichidanVerb,
),
LemmatizationRule.simple(
name: 'Ichidan verb - negative past desire form',
pattern: 'たくなかった',
replacement: '',
lookAheadBehind: lookBehinds,
validChildClasses: [WordClass.ichidanVerb],
wordClass: WordClass.ichidanVerb,
),
];

View File

@@ -4,7 +4,7 @@
/// See https://www.regular-expressions.info/unicode.html
///
/// Remember to turn on the unicode flag when making a new RegExp.
const String rawKanjiRegex = r'\p{Script=Hani}';
const String rawCJKRegex = r'\p{Script=Hani}';
/// The string version of a regex that will match any katakana.
/// This includes the ranges (), ()
@@ -22,7 +22,24 @@ const String rawKatakanaRegex = r'\p{Script=Katakana}';
/// Remember to turn on the unicode flag when making a new RegExp.
const String rawHiraganaRegex = r'\p{Script=Hiragana}';
/// The string version of a regex that will match any kanji.
/// This includes the ranges (), ()
///
/// See https://www.regular-expressions.info/unicode.html
///
/// Remember to turn on the unicode flag when making a new RegExp.
const String rawKanjiRegex = r'[\u3400-\u4DB5\u4E00-\u9FCB\uF900-\uFA6A]';
final RegExp kanjiRegex = RegExp(rawKanjiRegex, unicode: true);
final RegExp cjkRegex = RegExp(rawCJKRegex, unicode: true);
final RegExp katakanaRegex = RegExp(rawKatakanaRegex, unicode: true);
final RegExp hiraganaRegex = RegExp(rawHiraganaRegex, unicode: true);
final RegExp kanjiRegex = RegExp(rawKanjiRegex, unicode: true);
List<String> filterKanjiSuggestions(String string) {
return kanjiRegex
.allMatches(string)
.map((match) => match.group(0))
.where((element) => element != null)
.map((element) => element!)
.toList();
}

View File

@@ -33,39 +33,40 @@ CREATE TABLE "JMdict_InfoReading" (
-- not implement a check for it.
CREATE TABLE "JMdict_Entry" (
"id" INTEGER PRIMARY KEY
"entryId" INTEGER PRIMARY KEY
);
-- KanjiElement
CREATE TABLE "JMdict_KanjiElement" (
"entryId" INTEGER NOT NULL REFERENCES "JMdict_Entry"("id"),
"orderNum" INTEGER,
"elementId" INTEGER PRIMARY KEY,
"entryId" INTEGER NOT NULL REFERENCES "JMdict_Entry"("entryId"),
"orderNum" INTEGER NOT NULL,
"reading" TEXT NOT NULL,
"news" INTEGER CHECK ("news" BETWEEN 1 AND 2),
"ichi" INTEGER CHECK ("ichi" BETWEEN 1 AND 2),
"spec" INTEGER CHECK ("spec" BETWEEN 1 AND 2),
"gai" INTEGER CHECK ("gai" BETWEEN 1 AND 2),
"nf" INTEGER CHECK ("nf" BETWEEN 1 AND 48),
PRIMARY KEY ("entryId", "reading")
UNIQUE("entryId", "reading"),
UNIQUE("entryId", "orderNum")
) WITHOUT ROWID;
CREATE INDEX "JMdict_KanjiElement_byEntryId_byOrderNum" ON "JMdict_KanjiElement"("entryId", "orderNum");
CREATE INDEX "JMdict_KanjiElement_byReading" ON "JMdict_KanjiElement"("reading");
CREATE TABLE "JMdict_KanjiElementInfo" (
"entryId" INTEGER NOT NULL,
"reading" TEXT NOT NULL,
"elementId" INTEGER NOT NULL REFERENCES "JMdict_KanjiElement"("elementId"),
"info" TEXT NOT NULL REFERENCES "JMdict_InfoKanji"("id"),
FOREIGN KEY ("entryId", "reading")
REFERENCES "JMdict_KanjiElement"("entryId", "reading"),
PRIMARY KEY ("entryId", "reading", "info")
PRIMARY KEY ("elementId", "info")
) WITHOUT ROWID;
-- ReadingElement
CREATE TABLE "JMdict_ReadingElement" (
"entryId" INTEGER NOT NULL REFERENCES "JMdict_Entry"("id"),
"orderNum" INTEGER,
"elementId" INTEGER PRIMARY KEY,
"entryId" INTEGER NOT NULL REFERENCES "JMdict_Entry"("entryId"),
"orderNum" INTEGER NOT NULL,
"reading" TEXT NOT NULL,
"readingDoesNotMatchKanji" BOOLEAN NOT NULL DEFAULT FALSE,
"news" INTEGER CHECK ("news" BETWEEN 1 AND 2),
@@ -73,55 +74,51 @@ CREATE TABLE "JMdict_ReadingElement" (
"spec" INTEGER CHECK ("spec" BETWEEN 1 AND 2),
"gai" INTEGER CHECK ("gai" BETWEEN 1 AND 2),
"nf" INTEGER CHECK ("nf" BETWEEN 1 AND 48),
PRIMARY KEY ("entryId", "reading")
UNIQUE("entryId", "reading"),
UNIQUE("entryId", "orderNum")
) WITHOUT ROWID;
CREATE INDEX "JMdict_ReadingElement_byEntryId_byOrderNum" ON "JMdict_ReadingElement"("entryId", "orderNum");
CREATE INDEX "JMdict_ReadingElement_byReading" ON "JMdict_ReadingElement"("reading");
CREATE TABLE "JMdict_ReadingElementRestriction" (
"entryId" INTEGER NOT NULL,
"reading" TEXT NOT NULL,
"elementId" INTEGER NOT NULL REFERENCES "JMdict_ReadingElement"("elementId"),
"restriction" TEXT NOT NULL,
FOREIGN KEY ("entryId", "reading")
REFERENCES "JMdict_ReadingElement"("entryId", "reading"),
PRIMARY KEY ("entryId", "reading", "restriction")
PRIMARY KEY ("elementId", "restriction")
) WITHOUT ROWID;
CREATE TABLE "JMdict_ReadingElementInfo" (
"entryId" INTEGER NOT NULL,
"reading" TEXT NOT NULL,
"elementId" INTEGER NOT NULL REFERENCES "JMdict_ReadingElement"("elementId"),
"info" TEXT NOT NULL REFERENCES "JMdict_InfoReading"("id"),
FOREIGN KEY ("entryId", "reading")
REFERENCES "JMdict_ReadingElement"("entryId", "reading"),
PRIMARY KEY ("entryId", "reading", "info")
PRIMARY KEY ("elementId", "info")
) WITHOUT ROWID;
-- Sense
CREATE TABLE "JMdict_Sense" (
"id" INTEGER PRIMARY KEY AUTOINCREMENT,
"entryId" INTEGER REFERENCES "JMdict_Entry"("id"),
"orderNum" INTEGER,
"senseId" INTEGER PRIMARY KEY,
"entryId" INTEGER NOT NULL REFERENCES "JMdict_Entry"("entryId"),
"orderNum" INTEGER NOT NULL,
UNIQUE("entryId", "orderNum")
);
CREATE INDEX "JMdict_Sense_byEntryId_byOrderNum" ON "JMdict_Sense"("entryId", "orderNum");
CREATE TABLE "JMdict_SenseRestrictedToKanji" (
"entryId" INTEGER,
"senseId" INTEGER REFERENCES "JMdict_Sense"("id"),
"kanji" TEXT,
"entryId" INTEGER NOT NULL,
"senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("senseId"),
"kanji" TEXT NOT NULL,
FOREIGN KEY ("entryId", "kanji") REFERENCES "JMdict_KanjiElement"("entryId", "reading"),
PRIMARY KEY ("entryId", "senseId", "kanji")
);
) WITHOUT ROWID;
CREATE TABLE "JMdict_SenseRestrictedToReading" (
"entryId" INTEGER,
"senseId" INTEGER REFERENCES "JMdict_Sense"("id"),
"reading" TEXT,
"entryId" INTEGER NOT NULL,
"senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("senseId"),
"reading" TEXT NOT NULL,
FOREIGN KEY ("entryId", "reading") REFERENCES "JMdict_ReadingElement"("entryId", "reading"),
PRIMARY KEY ("entryId", "senseId", "reading")
);
) WITHOUT ROWID;
-- In order to add xrefs, you will need to have added the entry to xref to.
-- These should be added in a second pass of the dictionary file.
@@ -134,37 +131,33 @@ CREATE TABLE "JMdict_SenseRestrictedToReading" (
-- These two things also concern "SenseAntonym"
CREATE TABLE "JMdict_SenseSeeAlso" (
"senseId" INTEGER REFERENCES "JMdict_Sense"("id"),
"xrefEntryId" INTEGER,
"senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("senseId"),
"xrefEntryId" INTEGER NOT NULL,
"seeAlsoReading" TEXT,
"seeAlsoKanji" TEXT,
"seeAlsoSense" INTEGER,
-- For some entries, the cross reference is ambiguous. This means that while the ingestion
-- has determined some xrefEntryId, it is not guaranteed to be the correct one.
"ambiguous" BOOLEAN,
CHECK ("seeAlsoReading" = NULL <> "seeAlsoKanji" = NULL),
-- CHECK("seeAlsoSense" = NULL OR "seeAlsoSense")
-- TODO: Check that if seeAlsoSense is present, it refers to a sense connected to xrefEntryId.
"ambiguous" BOOLEAN NOT NULL DEFAULT FALSE,
FOREIGN KEY ("xrefEntryId", "seeAlsoKanji") REFERENCES "JMdict_KanjiElement"("entryId", "reading"),
FOREIGN KEY ("xrefEntryId", "seeAlsoReading") REFERENCES "JMdict_ReadingElement"("entryId", "reading"),
FOREIGN KEY ("xrefEntryId", "seeAlsoSense") REFERENCES "JMdict_Sense"("entryId", "orderNum"),
PRIMARY KEY ("senseId", "xrefEntryId", "seeAlsoReading", "seeAlsoKanji", "seeAlsoSense")
UNIQUE("senseId", "xrefEntryId", "seeAlsoReading", "seeAlsoKanji", "seeAlsoSense")
);
CREATE TABLE "JMdict_SenseAntonym" (
"senseId" INTEGER REFERENCES "JMdict_Sense"("id"),
"xrefEntryId" INTEGER,
"senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("senseId"),
"xrefEntryId" INTEGER NOT NULL,
"antonymReading" TEXT,
"antonymKanji" TEXT,
"antonymSense" INTEGER,
-- For some entries, the cross reference is ambiguous. This means that while the ingestion
-- has determined some xrefEntryId, it is not guaranteed to be the correct one.
"ambiguous" BOOLEAN,
CHECK ("antonymReading" = NULL <> "antonymKanji" = NULL),
"ambiguous" BOOLEAN NOT NULL DEFAULT FALSE,
FOREIGN KEY ("xrefEntryId", "antonymKanji") REFERENCES "JMdict_KanjiElement"("entryId", "reading"),
FOREIGN KEY ("xrefEntryId", "antonymReading") REFERENCES "JMdict_ReadingElement"("entryId", "reading"),
FOREIGN KEY ("xrefEntryId", "antonymSense") REFERENCES "JMdict_Sense"("entryId", "orderNum"),
PRIMARY KEY ("senseId", "xrefEntryId", "antonymReading", "antonymKanji", "antonymSense")
UNIQUE("senseId", "xrefEntryId", "antonymReading", "antonymKanji", "antonymSense")
);
-- These cross references are going to be mostly accessed from a sense
@@ -173,7 +166,7 @@ CREATE INDEX "JMdict_SenseSeeAlso_bySenseId" ON "JMdict_SenseSeeAlso"("senseId")
CREATE INDEX "JMdict_SenseAntonym_bySenseId" ON "JMdict_SenseAntonym"("senseId");
CREATE TABLE "JMdict_SensePOS" (
"senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("id"),
"senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("senseId"),
"pos" TEXT NOT NULL REFERENCES "JMdict_InfoPOS"("id"),
PRIMARY KEY ("senseId", "pos")
) WITHOUT ROWID;
@@ -181,28 +174,28 @@ CREATE TABLE "JMdict_SensePOS" (
CREATE TABLE "JMdict_SenseField" (
"senseId" INTEGER NOT NULL,
"field" TEXT NOT NULL,
FOREIGN KEY ("senseId") REFERENCES "JMdict_Sense"("id"),
FOREIGN KEY ("senseId") REFERENCES "JMdict_Sense"("senseId"),
FOREIGN KEY ("field") REFERENCES "JMdict_InfoField"("id"),
PRIMARY KEY ("senseId", "field")
) WITHOUT ROWID;
CREATE TABLE "JMdict_SenseMisc" (
"senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("id"),
"senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("senseId"),
"misc" TEXT NOT NULL REFERENCES "JMdict_InfoMisc"("id"),
PRIMARY KEY ("senseId", "misc")
) WITHOUT ROWID;
CREATE TABLE "JMdict_SenseLanguageSource" (
"senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("id"),
"senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("senseId"),
"language" CHAR(3) NOT NULL DEFAULT "eng",
"phrase" TEXT,
"fullyDescribesSense" BOOLEAN NOT NULL DEFAULT TRUE,
"constructedFromSmallerWords" BOOLEAN NOT NULL DEFAULT FALSE,
PRIMARY KEY ("senseId", "language", "phrase")
UNIQUE("senseId", "language", "phrase")
);
CREATE TABLE "JMdict_SenseDialect" (
"senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("id"),
"senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("senseId"),
"dialect" TEXT NOT NULL REFERENCES "JMdict_InfoDialect"("id"),
PRIMARY KEY ("senseId", "dialect")
) WITHOUT ROWID;
@@ -213,7 +206,7 @@ CREATE TABLE "JMdict_SenseDialect" (
-- will be omitted.
CREATE TABLE "JMdict_SenseGlossary" (
"senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("id"),
"senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("senseId"),
"phrase" TEXT NOT NULL,
"language" CHAR(3) NOT NULL DEFAULT "eng",
"type" TEXT,
@@ -223,7 +216,7 @@ CREATE TABLE "JMdict_SenseGlossary" (
CREATE INDEX "JMdict_SenseGlossary_byPhrase" ON JMdict_SenseGlossary("phrase");
CREATE TABLE "JMdict_SenseInfo" (
"senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("id"),
"senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("senseId"),
"info" TEXT NOT NULL,
PRIMARY KEY ("senseId", "info")
) WITHOUT ROWID;
@@ -232,8 +225,7 @@ CREATE TABLE "JMdict_SenseInfo" (
-- the Tanaka Corpus, so I will leave the type out for now.
CREATE TABLE "JMdict_ExampleSentence" (
"id" INTEGER PRIMARY KEY,
"senseId" INTEGER REFERENCES "JMdict_Sense"("id"),
"senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("senseId"),
"word" TEXT NOT NULL,
"source" TEXT NOT NULL,
"sourceLanguage" CHAR(3) NOT NULL DEFAULT "eng",

View File

@@ -0,0 +1,55 @@
CREATE VIRTUAL TABLE "JMdict_KanjiElementFTS" USING FTS5("elementId" UNINDEXED, "reading");
CREATE TRIGGER "JMdict_KanjiElement_InsertFTS"
AFTER INSERT ON "JMdict_KanjiElement"
BEGIN
INSERT INTO "JMdict_KanjiElementFTS"("elementId", "reading")
VALUES (NEW."elementId", NEW."reading");
END;
CREATE TRIGGER "JMdict_KanjiElement_UpdateFTS"
AFTER UPDATE OF "elementId", "reading"
ON "JMdict_KanjiElement"
BEGIN
UPDATE "JMdict_KanjiElementFTS"
SET
"elementId" = NEW."elementId",
"reading" = NEW."reading"
WHERE "elementId" = OLD."elementId";
END;
CREATE TRIGGER "JMdict_KanjiElement_DeleteFTS"
AFTER DELETE ON "JMdict_KanjiElement"
BEGIN
DELETE FROM "JMdict_KanjiElementFTS"
WHERE "elementId" = OLD."elementId";
END;
CREATE VIRTUAL TABLE "JMdict_ReadingElementFTS" USING FTS5("elementId" UNINDEXED, "reading");
CREATE TRIGGER "JMdict_ReadingElement_InsertFTS"
AFTER INSERT ON "JMdict_ReadingElement"
BEGIN
INSERT INTO "JMdict_ReadingElementFTS"("elementId", "reading")
VALUES (NEW."elementId", NEW."reading");
END;
CREATE TRIGGER "JMdict_ReadingElement_UpdateFTS"
AFTER UPDATE OF "elementId", "reading"
ON "JMdict_ReadingElement"
BEGIN
UPDATE "JMdict_ReadingElementFTS"
SET
"elementId" = NEW."elementId",
"reading" = NEW."reading"
WHERE "elementId" = OLD."elementId";
END;
CREATE TRIGGER "JMdict_ReadingElement_DeleteFTS"
AFTER DELETE ON "JMdict_ReadingElement"
BEGIN
DELETE FROM "JMdict_ReadingElementFTS"
WHERE "elementId" = OLD."elementId";
END;

View File

@@ -2,7 +2,7 @@ CREATE TABLE "JMdict_JLPTTag" (
"entryId" INTEGER NOT NULL,
"jlptLevel" CHAR(2) NOT NULL CHECK ("jlptLevel" in ('N5', 'N4', 'N3', 'N2', 'N1')),
FOREIGN KEY ("entryId")
REFERENCES "JMdict_Entry"("id"),
REFERENCES "JMdict_Entry"("entryId"),
PRIMARY KEY ("entryId", "jlptLevel")
) WITHOUT ROWID;

View File

@@ -0,0 +1,253 @@
CREATE TABLE "JMdict_EntryScore" (
"type" TEXT NOT NULL CHECK ("type" IN ('reading', 'kanji')),
"elementId" INTEGER NOT NULL,
"score" INTEGER NOT NULL DEFAULT 0,
"common" BOOLEAN NOT NULL DEFAULT FALSE,
PRIMARY KEY ("type", "elementId")
) WITHOUT ROWID;
CREATE INDEX "JMdict_EntryScore_byElementId_byScore" ON "JMdict_EntryScore"("elementId", "score");
CREATE INDEX "JMdict_EntryScore_byScore" ON "JMdict_EntryScore"("score");
CREATE INDEX "JMdict_EntryScore_byCommon" ON "JMdict_EntryScore"("common");
CREATE INDEX "JMdict_EntryScore_byType_byElementId_byScore" ON "JMdict_EntryScore"("type", "elementId", "score");
CREATE INDEX "JMdict_EntryScore_byType_byScore" ON "JMdict_EntryScore"("type", "score");
CREATE INDEX "JMdict_EntryScore_byType_byCommon" ON "JMdict_EntryScore"("type", "common");
-- NOTE: these views are deduplicated in order not to perform an unnecessary
-- UNION on every trigger
CREATE VIEW "JMdict_EntryScoreView_Reading" AS
SELECT
'reading' AS "type",
"JMdict_ReadingElement"."elementId",
(
"news" IS 1
OR "ichi" IS 1
OR "spec" IS 1
OR "gai" IS 1
)
AS "common",
((
"news" IS 1
OR "ichi" IS 1
OR "spec" IS 1
OR "gai" IS 1
) * 50)
+ (("news" IS 1) * 10)
+ (("news" IS 2) * 5)
+ (("ichi" IS 1) * 10)
+ (("ichi" IS 2) * 5)
+ (("spec" IS 1) * 10)
+ (("spec" IS 2) * 5)
+ (("gai" IS 1) * 10)
+ (("gai" IS 2) * 5)
+ (("orderNum" IS 1) * 20)
- (substr(COALESCE("JMdict_JLPTTag"."jlptLevel", 'N0'), 2) * -5)
AS "score"
FROM "JMdict_ReadingElement"
LEFT JOIN "JMdict_JLPTTag" USING ("entryId");
CREATE VIEW "JMdict_EntryScoreView_Kanji" AS
SELECT
'kanji' AS "type",
"JMdict_KanjiElement"."elementId",
(
"news" IS 1
OR "ichi" IS 1
OR "spec" IS 1
OR "gai" IS 1
)
AS "common",
((
"news" IS 1
OR "ichi" IS 1
OR "spec" IS 1
OR "gai" IS 1
) * 50)
+ (("news" IS 1) * 10)
+ (("news" IS 2) * 5)
+ (("ichi" IS 1) * 10)
+ (("ichi" IS 2) * 5)
+ (("spec" IS 1) * 10)
+ (("spec" IS 2) * 5)
+ (("gai" IS 1) * 10)
+ (("gai" IS 2) * 5)
+ (("orderNum" IS 1) * 20)
- (substr(COALESCE("JMdict_JLPTTag"."jlptLevel", 'N0'), 2) * -5)
AS "score"
FROM "JMdict_KanjiElement"
LEFT JOIN "JMdict_JLPTTag" USING ("entryId");
CREATE VIEW "JMdict_EntryScoreView" AS
SELECT *
FROM "JMdict_EntryScoreView_Kanji"
UNION ALL
SELECT *
FROM "JMdict_EntryScoreView_Reading";
--- JMdict_ReadingElement triggers
CREATE TRIGGER "JMdict_EntryScore_Insert_JMdict_ReadingElement"
AFTER INSERT ON "JMdict_ReadingElement"
BEGIN
INSERT INTO "JMdict_EntryScore" (
"type",
"elementId",
"score",
"common"
)
SELECT "type", "elementId", "score", "common"
FROM "JMdict_EntryScoreView_Reading"
WHERE "elementId" = NEW."elementId";
END;
CREATE TRIGGER "JMdict_EntryScore_Update_JMdict_ReadingElement"
AFTER UPDATE OF "news", "ichi", "spec", "gai", "nf", "orderNum"
ON "JMdict_ReadingElement"
BEGIN
UPDATE "JMdict_EntryScore"
SET
"score" = "JMdict_EntryScoreView_Reading"."score",
"common" = "JMdict_EntryScoreView_Reading"."common"
FROM "JMdict_EntryScoreView_Reading"
WHERE "elementId" = NEW."elementId";
END;
CREATE TRIGGER "JMdict_EntryScore_Delete_JMdict_ReadingElement"
AFTER DELETE ON "JMdict_ReadingElement"
BEGIN
DELETE FROM "JMdict_EntryScore"
WHERE "type" = 'reading'
AND "elementId" = OLD."elementId";
END;
--- JMdict_KanjiElement triggers
CREATE TRIGGER "JMdict_EntryScore_Insert_JMdict_KanjiElement"
AFTER INSERT ON "JMdict_KanjiElement"
BEGIN
INSERT INTO "JMdict_EntryScore" (
"type",
"elementId",
"score",
"common"
)
SELECT "type", "elementId", "score", "common"
FROM "JMdict_EntryScoreView_Kanji"
WHERE "elementId" = NEW."elementId";
END;
CREATE TRIGGER "JMdict_EntryScore_Update_JMdict_KanjiElement"
AFTER UPDATE OF "news", "ichi", "spec", "gai", "nf", "orderNum"
ON "JMdict_KanjiElement"
BEGIN
UPDATE "JMdict_EntryScore"
SET
"score" = "JMdict_EntryScoreView_Kanji"."score",
"common" = "JMdict_EntryScoreView_Kanji"."common"
FROM "JMdict_EntryScoreView_Kanji"
WHERE "elementId" = NEW."elementId";
END;
CREATE TRIGGER "JMdict_EntryScore_Delete_JMdict_KanjiElement"
AFTER DELETE ON "JMdict_KanjiElement"
BEGIN
DELETE FROM "JMdict_EntryScore"
WHERE "type" = 'kanji'
AND "elementId" = OLD."elementId";
END;
--- JMdict_JLPTTag triggers
CREATE TRIGGER "JMdict_EntryScore_Insert_JMdict_JLPTTag"
AFTER INSERT ON "JMdict_JLPTTag"
BEGIN
UPDATE "JMdict_EntryScore"
SET
"score" = "JMdict_EntryScoreView"."score",
"common" = "JMdict_EntryScoreView"."common"
FROM "JMdict_EntryScoreView"
WHERE
(
(
"JMdict_EntryScoreView"."type" = 'kanji'
AND
"JMdict_EntryScoreView"."elementId" IN (
SELECT "elementId" FROM "JMdict_KanjiElement" WHERE "entryId" = NEW."entryId"
)
)
OR
(
"JMdict_EntryScoreView"."type" = 'reading'
AND
"JMdict_EntryScoreView"."elementId" IN (
SELECT "elementId" FROM "JMdict_ReadingElement" WHERE "entryId" = NEW."entryId"
)
)
)
AND "JMdict_EntryScoreView"."entryId" = "JMdict_EntryScore"."entryId"
AND "JMdict_EntryScoreView"."reading" = "JMdict_EntryScore"."reading";
END;
CREATE TRIGGER "JMdict_EntryScore_Update_JMdict_JLPTTag"
AFTER UPDATE OF "jlptLevel"
ON "JMdict_JLPTTag"
BEGIN
UPDATE "JMdict_EntryScore"
SET
"score" = "JMdict_EntryScoreView"."score",
"common" = "JMdict_EntryScoreView"."common"
FROM "JMdict_EntryScoreView"
WHERE
(
(
"JMdict_EntryScoreView"."type" = 'kanji'
AND
"JMdict_EntryScoreView"."elementId" IN (
SELECT "elementId" FROM "JMdict_KanjiElement" WHERE "entryId" = NEW."entryId"
)
)
OR
(
"JMdict_EntryScoreView"."type" = 'reading'
AND
"JMdict_EntryScoreView"."elementId" IN (
SELECT "elementId" FROM "JMdict_ReadingElement" WHERE "entryId" = NEW."entryId"
)
)
)
AND "JMdict_EntryScoreView"."entryId" = "JMdict_EntryScore"."entryId"
AND "JMdict_EntryScoreView"."reading" = "JMdict_EntryScore"."reading";
END;
CREATE TRIGGER "JMdict_EntryScore_Delete_JMdict_JLPTTag"
AFTER DELETE ON "JMdict_JLPTTag"
BEGIN
UPDATE "JMdict_EntryScore"
SET
"score" = "JMdict_EntryScoreView"."score",
"common" = "JMdict_EntryScoreView"."common"
FROM "JMdict_EntryScoreView"
WHERE
(
(
"JMdict_EntryScoreView"."type" = 'kanji'
AND
"JMdict_EntryScoreView"."elementId" IN (
SELECT "elementId" FROM "JMdict_KanjiElement" WHERE "entryId" = OLD."entryId"
)
)
OR
(
"JMdict_EntryScoreView"."type" = 'reading'
AND
"JMdict_EntryScoreView"."elementId" IN (
SELECT "elementId" FROM "JMdict_ReadingElement" WHERE "entryId" = OLD."entryId"
)
)
)
AND "JMdict_EntryScoreView"."entryId" = "JMdict_EntryScore"."entryId"
AND "JMdict_EntryScoreView"."reading" = "JMdict_EntryScore"."reading";
END;

View File

@@ -6,6 +6,3 @@ CREATE TABLE "RADKFILE" (
CREATE INDEX "RADK" ON "RADKFILE"("radical");
CREATE INDEX "KRAD" ON "RADKFILE"("kanji");
CREATE VIEW "RADKFILE_Radicals" AS
SELECT DISTINCT "radical" FROM "RADKFILE";

View File

@@ -1,22 +0,0 @@
-- These tables are for optimizing searches.
-- In order to include results from both, the software should
-- first check if the searchword is convertible to kana, and then
-- potentially get results from both by doing a union between two
-- selects.
CREATE TABLE "JMdict_EntryByKana" (
"kana" TEXT NOT NULL,
"entryId" INTEGER NOT NULL REFERENCES "JMdict_Entry"("id"),
PRIMARY KEY ("kana", "entryId")
) WITHOUT ROWID;
CREATE INDEX "JMdict_EntryByKana_byKana" ON "JMdict_EntryByKana"("kana");
CREATE TABLE "JMdict_EntryByEnglish" (
"english" TEXT NOT NULL,
"entryId" INTEGER NOT NULL REFERENCES "JMdict_Entry"("id"),
PRIMARY KEY ("english", "entryId")
) WITHOUT ROWID;
CREATE INDEX "JMdict_EntryByEnglish_byEnglish" ON "JMdict_EntryByEnglish"("english");

View File

@@ -6,12 +6,6 @@ CREATE TABLE "KANJIDIC_Character" (
"jlpt" INTEGER
) WITHOUT ROWID;
CREATE TABLE "KANJIDIC_RadicalName" (
"kanji" CHAR(1) NOT NULL REFERENCES "KANJIDIC_Character"("literal"),
"name" TEXT NOT NULL,
PRIMARY KEY("kanji", "name")
) WITHOUT ROWID;
CREATE TABLE "KANJIDIC_Codepoint" (
"kanji" CHAR(1) NOT NULL REFERENCES "KANJIDIC_Character"("literal"),
"type" VARCHAR(6) NOT NULL CHECK ("type" IN ('jis208', 'jis212', 'jis213', 'ucs')),
@@ -22,12 +16,25 @@ CREATE TABLE "KANJIDIC_Codepoint" (
CREATE INDEX "KANJIDIC_Codepoint_byCharacter" ON "KANJIDIC_Codepoint"("kanji");
CREATE TABLE "KANJIDIC_Radical" (
"kanji" CHAR(1) NOT NULL REFERENCES "KANJIDIC_Character"("literal"),
"type" VARCHAR(9) NOT NULL CHECK ("type" IN ('classical', 'nelson_c')),
"radical" INTEGER NOT NULL CHECK ("radical" BETWEEN 1 AND IIF("type" = 'classical', 214, 213)),
PRIMARY KEY("kanji", "type")
"kanji" CHAR(1) NOT NULL PRIMARY KEY REFERENCES "KANJIDIC_Character"("literal"),
"radicalId" INTEGER NOT NULL CHECK ("radicalId" BETWEEN 1 AND 214)
) WITHOUT ROWID;
CREATE INDEX "KANJIDIC_Radical_byRadicalId" ON "KANJIDIC_Radical"("radicalId");
CREATE TABLE "KANJIDIC_RadicalNelsonCId" (
"radicalId" INTEGER NOT NULL PRIMARY KEY CHECK ("radicalId" BETWEEN 1 AND 214),
"nelsonId" INTEGER UNIQUE NOT NULL CHECK ("nelsonId" BETWEEN 1 AND 213)
);
CREATE TABLE "KANJIDIC_RadicalName" (
"radicalId" INTEGER NOT NULL CHECK ("radicalId" BETWEEN 1 AND 214),
"name" TEXT NOT NULL,
PRIMARY KEY("radicalId", "name")
) WITHOUT ROWID;
CREATE INDEX "KANJIDIC_RadicalName_byRadicalId" ON "KANJIDIC_RadicalName"("radicalId");
CREATE TABLE "KANJIDIC_StrokeMiscount" (
"kanji" CHAR(1) NOT NULL REFERENCES "KANJIDIC_Character"("literal"),
"strokeCount" INTEGER NOT NULL,
@@ -106,6 +113,7 @@ CREATE TABLE "KANJIDIC_QueryCode" (
"code" VARCHAR(7) NOT NULL,
"type" VARCHAR(11) NOT NULL CHECK ("type" IN ('skip', 'sh_desc', 'four_corner', 'deroo', 'misclass')),
"SKIPMisclassification" VARCHAR(15),
CHECK ("SKIPMisclassification" IS NULL OR "type" = 'skip'),
PRIMARY KEY ("kanji", "type", "code")
) WITHOUT ROWID;
@@ -120,30 +128,39 @@ CREATE INDEX "KANJIDIC_Reading_byReading" ON "KANJIDIC_Reading"("reading");
CREATE TABLE "KANJIDIC_Kunyomi" (
"kanji" CHAR(1) NOT NULL REFERENCES "KANJIDIC_Character"("literal"),
"orderNum" INTEGER NOT NULL,
"yomi" TEXT NOT NULL,
"isJouyou" BOOLEAN,
UNIQUE("kanji", "orderNum"),
PRIMARY KEY ("kanji", "yomi")
) WITHOUT ROWID;
CREATE INDEX "KANJIDIC_Kunyomi_byKanji_byOrderNum" ON "KANJIDIC_Kunyomi"("kanji", "orderNum");
CREATE INDEX "KANJIDIC_Kunyomi_byYomi" ON "KANJIDIC_Kunyomi"("yomi");
CREATE TABLE "KANJIDIC_Onyomi" (
"kanji" CHAR(1) NOT NULL REFERENCES "KANJIDIC_Character"("literal"),
"orderNum" INTEGER NOT NULL,
"yomi" TEXT NOT NULL,
"type" VARCHAR(7) CHECK ("type" IN ('kan', 'go', 'tou', 'kan''you')),
"isJouyou" BOOLEAN,
UNIQUE("kanji", "orderNum"),
PRIMARY KEY ("kanji", "yomi")
) WITHOUT ROWID;
CREATE INDEX "KANJIDIC_Onyomi_byKanji_byOrderNum" ON "KANJIDIC_Onyomi"("kanji", "orderNum");
CREATE INDEX "KANJIDIC_Onyomi_byYomi" ON "KANJIDIC_Onyomi"("yomi");
CREATE TABLE "KANJIDIC_Meaning" (
"kanji" CHAR(1) NOT NULL REFERENCES "KANJIDIC_Character"("literal"),
"orderNum" INTEGER NOT NULL,
"language" CHAR(3) NOT NULL DEFAULT "eng",
"meaning" TEXT NOT NULL,
UNIQUE("kanji", "orderNum"),
PRIMARY KEY ("kanji", "language", "meaning")
) WITHOUT ROWID;
CREATE INDEX "KANJIDIC_Meaning_byKanji_byOrderNum" ON "KANJIDIC_Meaning"("kanji", "orderNum");
CREATE INDEX "KANJIDIC_Meaning_byMeaning" ON "KANJIDIC_Meaning"("meaning");
CREATE TABLE "KANJIDIC_Nanori" (

View File

@@ -1,13 +1,11 @@
CREATE TABLE "XREF__KANJIDIC_Radical__RADKFILE"(
"radicalId" INTEGER NOT NULL,
"radicalSymbol" CHAR(1) NOT NULL REFERENCES "RADKFILE"("radical"),
"radicalType" VARCHAR(9) NOT NULL CHECK ("radicalType" IN ('classical', 'nelson_c')) DEFAULT 'classical',
PRIMARY KEY ("radicalId", "radicalSymbol", "radicalType"),
FOREIGN KEY ("radicalId", "radicalType") REFERENCES "KANJIDIC_Radical"("radical", "type")
"radicalId" INTEGER NOT NULL CHECK ("radicalId" BETWEEN 1 AND 214),
"radicalSymbol" CHAR(1) UNIQUE NOT NULL REFERENCES "RADKFILE"("radical"),
PRIMARY KEY ("radicalId", "radicalSymbol")
) WITHOUT ROWID;
CREATE INDEX "XREF__KANJIDIC_Radical__RADKFILE__byRadicalId" ON "XREF__KANJIDIC_Radical__RADKFILE"("radicalId");
CREATE INDEX "XREF__KANJIDIC_Radical__RADKFILE__byRadicalSymbol_byRadicalType" ON "XREF__KANJIDIC_Radical__RADKFILE"("radicalSymbol", "radicalType");
CREATE INDEX "XREF__KANJIDIC_Radical__RADKFILE__byRadicalSymbol" ON "XREF__KANJIDIC_Radical__RADKFILE"("radicalSymbol");
/* Source: https://ctext.org/kangxi-zidian */
INSERT INTO "XREF__KANJIDIC_Radical__RADKFILE"("radicalId", "radicalSymbol") VALUES

89
migrations/0010_Views.sql Normal file
View File

@@ -0,0 +1,89 @@
CREATE VIEW "JMdict_EntryByKana"("kana", "entryId")
AS
SELECT
"JMdict_ReadingElement"."reading" AS "kana",
"JMdict_ReadingElement"."entryId" AS "entryId"
FROM "JMdict_ReadingElement";
CREATE VIEW "JMdict_EntryByEnglish"("english", "entryId")
AS
SELECT
"JMdict_SenseGlossary"."phrase" AS "english",
"JMdict_Sense"."senseId" AS "entryId"
FROM "JMdict_SenseGlossary" JOIN "JMdict_Sense" USING("senseId");
CREATE VIEW "JMdict_BaseAndFurigana"("entryId", "base", "furigana", "isFirst", "kanjiOrderNum", "readingOrderNum")
AS
SELECT
"JMdict_Entry"."entryId" AS "entryId",
CASE WHEN (
"JMdict_KanjiElement"."reading" IS NOT NULL
AND NOT "JMdict_ReadingElement"."readingDoesNotMatchKanji"
)
THEN "JMdict_KanjiElement"."reading"
ELSE "JMdict_ReadingElement"."reading"
END AS "base",
CASE WHEN (
"JMdict_KanjiElement"."reading" IS NOT NULL
AND NOT "JMdict_ReadingElement"."readingDoesNotMatchKanji"
)
THEN "JMdict_ReadingElement"."reading"
ELSE NULL
END AS "furigana",
COALESCE("JMdict_KanjiElement"."orderNum", 1)
+ "JMdict_ReadingElement"."orderNum"
= 2
AS "isFirst",
"JMdict_KanjiElement"."orderNum" AS "kanjiOrderNum",
"JMdict_ReadingElement"."orderNum" AS "readingOrderNum"
FROM "JMdict_Entry"
LEFT JOIN "JMdict_KanjiElement" USING("entryId")
LEFT JOIN "JMdict_ReadingElement" USING("entryId");
CREATE VIEW "JMdict_EntryCommon"("entryId")
AS
SELECT DISTINCT "entryId"
FROM "JMdict_EntryScore"
WHERE "JMdict_EntryScore"."common" = 1;
-- TODO: Make it possible to match words that contain the
-- kanji as an infix
CREATE VIEW "KANJIDIC_ExampleEntries"("kanji", "entryId")
AS
SELECT
"JMdict_KanjiElement"."entryId",
"KANJIDIC_Character"."literal" AS "kanji",
"JMdict_KanjiElement"."reading"
FROM
"KANJIDIC_Character"
JOIN "JMdict_KanjiElementFTS"
ON "JMdict_KanjiElementFTS"."reading" MATCH "KANJIDIC_Character"."literal" || '*'
JOIN "JMdict_KanjiElement"
ON "JMdict_KanjiElementFTS"."entryId" = "JMdict_KanjiElement"."entryId"
AND "JMdict_KanjiElementFTS"."reading" LIKE '%' || "JMdict_KanjiElement"."reading"
JOIN "JMdict_EntryScore"
ON "JMdict_EntryScore"."type" = 'kanji'
AND "JMdict_KanjiElement"."entryId" = "JMdict_EntryScore"."entryId"
AND "JMdict_KanjiElement"."reading" = "JMdict_EntryScore"."reading"
WHERE "JMdict_EntryScore"."common" = 1;
CREATE VIEW "RADKFILE_Radicals" AS
SELECT DISTINCT "radical" FROM "RADKFILE";
CREATE VIEW "JMdict_CombinedEntryScore"
AS
SELECT
CASE
WHEN "JMdict_EntryScore"."type" = 'kanji'
THEN (SELECT entryId FROM "JMdict_KanjiElement" WHERE "elementId" = "JMdict_EntryScore"."elementId")
WHEN "JMdict_EntryScore"."type" = 'reading'
THEN (SELECT entryId FROM "JMdict_ReadingElement" WHERE "elementId" = "JMdict_EntryScore"."elementId")
END AS "entryId",
MAX("JMdict_EntryScore"."score") AS "score",
MAX("JMdict_EntryScore"."common") AS "common"
FROM "JMdict_EntryScore"
GROUP BY "entryId";

View File

@@ -7,6 +7,7 @@
radkfile,
kanjidic2,
sqlite,
wal ? false,
}:
stdenvNoCC.mkDerivation {
name = "jadb";
@@ -16,7 +17,7 @@ stdenvNoCC.mkDerivation {
database-tool
sqlite
];
buildPhase = ''
runHook preBuild
@@ -29,7 +30,9 @@ stdenvNoCC.mkDerivation {
sqlite3 jadb.sqlite < "$migration"
done
"${lib.getExe database-tool}" create-db --libsqlite "${sqlite.out}/lib/libsqlite3.so"
"${lib.getExe database-tool}" create-db \
${lib.optionalString wal "--wal"} \
--libsqlite "${sqlite.out}/lib/libsqlite3.so"
runHook postBuild
'';

View File

@@ -10,6 +10,7 @@
stdenvNoCC.mkDerivation {
name = "docs";
src = database;
nativeBuildInputs = [
sqlite
schemaspy

View File

@@ -9,16 +9,17 @@
stdenvNoCC.mkDerivation {
name = "jmdict";
dontUnpack = true;
srcs = [
jmdict-src
jmdict-with-examples-src
];
dontUnpack = true;
nativeBuildInputs = [
gzip
xmlformat
];
buildPhase = ''
runHook preBuild

View File

@@ -15,6 +15,7 @@ stdenvNoCC.mkDerivation {
gzip
xmlformat
];
buildPhase = ''
runHook preBuild

View File

@@ -15,6 +15,7 @@ stdenv.mkDerivation {
gzip
iconv
];
buildPhase = ''
runHook preBuild

View File

@@ -13,10 +13,10 @@ packages:
dependency: transitive
description:
name: analyzer
sha256: "13c1e6c6fd460522ea840abec3f677cc226f5fec7872c04ad7b425517ccf54f7"
sha256: "904ae5bb474d32c38fb9482e2d925d5454cda04ddd0e55d2e6826bc72f6ba8c0"
url: "https://pub.dev"
source: hosted
version: "7.4.4"
version: "7.4.5"
args:
dependency: "direct main"
description:
@@ -69,10 +69,10 @@ packages:
dependency: transitive
description:
name: coverage
sha256: "9086475ef2da7102a0c0a4e37e1e30707e7fb7b6d28c209f559a9c5f8ce42016"
sha256: "802bd084fb82e55df091ec8ad1553a7331b61c08251eef19a508b6f3f3a9858d"
url: "https://pub.dev"
source: hosted
version: "1.12.0"
version: "1.13.1"
crypto:
dependency: transitive
description:
@@ -189,10 +189,10 @@ packages:
dependency: transitive
description:
name: meta
sha256: e3641ec5d63ebf0d9b41bd43201a66e3fc79a65db5f61fc181f04cd27aab950c
sha256: "23f08335362185a5ea2ad3a4e597f1375e78bce8a040df5c600c8d3552ef2394"
url: "https://pub.dev"
source: hosted
version: "1.16.0"
version: "1.17.0"
mime:
dependency: transitive
description:
@@ -373,26 +373,26 @@ packages:
dependency: "direct dev"
description:
name: test
sha256: "301b213cd241ca982e9ba50266bd3f5bd1ea33f1455554c5abb85d1be0e2d87e"
sha256: "0561f3a2cfd33d10232360f16dfcab9351cfb7ad9b23e6cd6e8c7fb0d62c7ac3"
url: "https://pub.dev"
source: hosted
version: "1.25.15"
version: "1.26.1"
test_api:
dependency: transitive
description:
name: test_api
sha256: fb31f383e2ee25fbbfe06b40fe21e1e458d14080e3c67e7ba0acfde4df4e0bbd
sha256: "522f00f556e73044315fa4585ec3270f1808a4b186c936e612cab0b565ff1e00"
url: "https://pub.dev"
source: hosted
version: "0.7.4"
version: "0.7.6"
test_core:
dependency: transitive
description:
name: test_core
sha256: "84d17c3486c8dfdbe5e12a50c8ae176d15e2a771b96909a9442b40173649ccaa"
sha256: "8619a9a45be044b71fe2cd6b77b54fd60f1c67904c38d48706e2852a2bda1c60"
url: "https://pub.dev"
source: hosted
version: "0.6.8"
version: "0.6.10"
typed_data:
dependency: transitive
description:
@@ -429,10 +429,10 @@ packages:
dependency: transitive
description:
name: web_socket
sha256: bfe6f435f6ec49cb6c01da1e275ae4228719e59a6b067048c51e72d9d63bcc4b
sha256: "34d64019aa8e36bf9842ac014bb5d2f5586ca73df5e4d9bf5c936975cae6982c"
url: "https://pub.dev"
source: hosted
version: "1.0.0"
version: "1.0.1"
web_socket_channel:
dependency: transitive
description:

View File

@@ -4,15 +4,15 @@ version: 1.0.0
homepage: https://git.pvv.ntnu.no/oysteikt/jadb
environment:
sdk: '>=3.0.0 <4.0.0'
sdk: '>=3.2.0 <4.0.0'
dependencies:
args: ^2.7.0
collection: ^1.19.1
collection: ^1.19.0
csv: ^6.0.0
equatable: ^2.0.7
sqflite_common: ^2.5.5
sqflite_common_ffi: ^2.3.5
equatable: ^2.0.0
sqflite_common: ^2.5.0
sqflite_common_ffi: ^2.3.0
xml: ^6.5.0
dev_dependencies:

View File

@@ -1,9 +1,9 @@
import 'package:collection/collection.dart';
import 'package:jadb/util/jouyou_kanji.dart';
import 'package:jadb/const_data/kanji_grades.dart';
import 'package:test/test.dart';
void main() {
test("Assert 2136 kanji in jouyou set", () {
expect(JOUYOU_KANJI.values.flattenedToSet.length, 2136);
expect(JOUYOU_KANJI_BY_GRADES.values.flattenedToSet.length, 2136);
});
}

View File

@@ -0,0 +1,33 @@
import 'dart:ffi';
import 'dart:io';
import 'package:jadb/models/create_empty_db.dart';
import 'package:jadb/search.dart';
import 'package:sqflite_common_ffi/sqflite_ffi.dart';
import 'package:test/test.dart';
import 'package:sqlite3/open.dart';
Future<DatabaseExecutor> setup_inmemory_database() async {
final libsqlitePath = Platform.environment['LIBSQLITE_PATH'];
if (libsqlitePath == null) {
throw Exception("LIBSQLITE_PATH is not set");
}
final db_connection = await createDatabaseFactoryFfi(
ffiInit: () =>
open.overrideForAll(() => DynamicLibrary.open(libsqlitePath)),
).openDatabase(':memory:');
return db_connection;
}
void main() {
test("Create empty db", () async {
final db = await setup_inmemory_database();
await createEmptyDb(db);
await db.jadbVerifyTables();
});
}

View File

@@ -0,0 +1,32 @@
import 'package:jadb/search.dart';
import 'package:test/test.dart';
import 'setup_database_connection.dart';
void main() {
test("Filter kanji", () async {
final connection = await setup_database_connection();
final result = await connection.filterKanji(
[
"a",
"b",
"c",
"",
"",
"",
"",
"",
"",
".",
"!",
"@",
";",
"",
],
deduplicate: false,
);
expect(result.join(), "漢字地字");
});
}

View File

@@ -1,4 +1,5 @@
import 'package:jadb/util/jouyou_kanji.dart';
import 'package:jadb/const_data/kanji_grades.dart';
import 'package:jadb/search.dart';
import 'package:test/test.dart';
import 'setup_database_connection.dart';
@@ -7,17 +8,17 @@ void main() {
test("Search a kanji", () async {
final connection = await setup_database_connection();
final result = await connection.searchKanji('');
final result = await connection.jadbSearchKanji('');
expect(result, isNotNull);
});
group("Search all jouyou kanji", () {
JOUYOU_KANJI.forEach((grade, characters) {
JOUYOU_KANJI_BY_GRADES.forEach((grade, characters) {
test("Search all kanji in grade $grade", () async {
final connection = await setup_database_connection();
for (final character in characters) {
final result = await connection.searchKanji(character);
final result = await connection.jadbSearchKanji(character);
expect(result, isNotNull);
}
}, timeout: Timeout.factor(10));

View File

@@ -1,9 +1,9 @@
import 'dart:io';
import 'package:jadb/_data_ingestion/open_local_db.dart';
import 'package:jadb/search.dart';
import 'package:sqflite_common/sqlite_api.dart';
Future<JaDBConnection> setup_database_connection() async {
Future<Database> setup_database_connection() async {
final lib_sqlite_path = Platform.environment['LIBSQLITE_PATH'];
final jadb_path = Platform.environment['JADB_PATH'];
@@ -20,9 +20,5 @@ Future<JaDBConnection> setup_database_connection() async {
jadbPath: jadb_path,
);
if (db_connection == null) {
throw Exception("Failed to open database");
}
return JaDBConnection(db_connection);
return db_connection;
}

View File

@@ -1,3 +1,4 @@
import 'package:jadb/search.dart';
import 'package:test/test.dart';
import 'setup_database_connection.dart';
@@ -5,8 +6,13 @@ import 'setup_database_connection.dart';
void main() {
test("Search a word", () async {
final connection = await setup_database_connection();
final result = await connection.jadbSearchWord("kana");
expect(result, isNotNull);
});
final result = await connection.searchWord("kana");
test("Get a word by id", () async {
final connection = await setup_database_connection();
final result = await connection.jadbGetWordById(1577090);
expect(result, isNotNull);
});
@@ -17,7 +23,7 @@ void main() {
// Test serializing all words
for (final letter in "aiueoksthnmyrw".split("")) {
await connection.searchWord(letter);
await connection.jadbSearchWord(letter);
}
},
timeout: Timeout.factor(100),