107 Commits

Author SHA1 Message Date
0d3ebc97f5 flake.lock: bump 2025-07-17 00:24:35 +02:00
bb68319527 treewide: add and apply a bunch of lints 2025-07-17 00:24:35 +02:00
2803db9c12 bin/query-word: fix default pagination 2025-07-16 18:32:47 +02:00
93b76ed660 word_search: include data for cross references 2025-07-16 18:32:28 +02:00
29a3a6aafb treewide: dart format 2025-07-16 15:23:04 +02:00
3a2adf0367 pubspec.{yaml,lock}: update deps 2025-07-15 21:32:42 +02:00
eae6e881a7 flake.lock: bump 2025-07-15 21:32:35 +02:00
0a3387e77a search: add function for fetching multiple kanji at once 2025-07-15 00:58:16 +02:00
f30465a33c search: add function for fetching multiple word entries by id at once 2025-07-15 00:52:25 +02:00
d9006a0767 word_search: fix count query 2025-07-13 20:34:39 +02:00
1e1761ab4d pubspec.{yaml,lock}: update deps 2025-07-13 20:15:13 +02:00
37d29fc6ad cli/query_word: add flags for pagination 2025-07-13 20:12:22 +02:00
60898fe9a2 word_search: fix pagination 2025-07-13 20:12:10 +02:00
5049157b02 cli/query_word: add --json flag 2025-07-13 16:27:11 +02:00
1868c6fb41 word_search: don't throw error on empty results 2025-07-09 14:57:19 +02:00
4ee21d98e2 flake.lock: bump 2025-07-08 20:37:16 +02:00
7247af19cb word_search: always order exact matches first 2025-07-07 13:27:50 +02:00
ac7deae608 word_search: remove duplicate results 2025-07-07 12:47:20 +02:00
7978b74f8d lib/{_data_ingestion/search}: store kanjidic onyomi as hiragana 2025-06-25 20:18:28 +02:00
50870f64a0 cli/query_kanji: remove -k flag, use arguments 2025-06-25 20:18:27 +02:00
62d77749e6 cli/query_word: allow querying with jmdict id 2025-06-25 20:18:27 +02:00
80b3610a72 Store type enum as CHAR(1) 2025-06-25 20:18:27 +02:00
54705c3c10 word_search: add TODO 2025-06-24 23:04:47 +02:00
c7134f0d06 flake.nix: filter src 2025-06-24 19:33:10 +02:00
aac9bf69f6 cli/create_db: return an erroneous exit on on error 2025-06-24 19:33:09 +02:00
189d4a95cf test/word_search: cover more functionality 2025-06-24 19:33:09 +02:00
c32775ce7a use ids for \{kanji,reading\}Element tables 2025-06-24 19:33:02 +02:00
78f546fa28 models/createEmptyDb: init 2025-06-23 21:11:42 +02:00
e0a35bdef9 lib: fix file name style 2025-06-22 12:48:23 +02:00
8ec9771222 word_search: add '.' filter for FTS input 2025-06-20 23:27:15 +02:00
e2fe033bf4 word_search: fix english search, filter '%' 2025-06-20 23:23:09 +02:00
a6a48c196d word_search: filter input for FTS chars 2025-06-20 23:10:10 +02:00
26618cc06d test: remove invalid null check 2025-06-20 23:01:33 +02:00
b855a1dc48 filter_kanji: fix implementation, add test 2025-06-20 22:44:26 +02:00
f8813e0ae3 word_search: add function for retrieving single entry by id 2025-06-20 21:57:43 +02:00
bd0fee1b2d lib: move table_names to separate dir 2025-05-31 16:21:59 +02:00
42e7c95f59 lib: format 2025-05-27 20:02:53 +02:00
b25cc85afe lib/search: make JaDBConnection into extension, add verifyTables 2025-05-27 20:02:13 +02:00
ec14016ab5 lib/util/lemmatizer: init 2025-05-26 17:23:49 +02:00
6eee49d2d1 lib/search/word: order english queries by score 2025-05-23 15:28:00 +02:00
f819280268 migrations: combine score tables 2025-05-23 15:27:56 +02:00
03a8e11d91 JMdict_BaseAndFurigana: move isFirst column out of dart query 2025-05-22 19:57:46 +02:00
fdd15df344 lib/search/filter_kanji: init 2025-05-22 19:45:10 +02:00
0ea8331298 nix: clean up formatting 2025-05-22 16:57:07 +02:00
9215807b5c migrations: move RADKFILE view to Views migration 2025-05-22 16:57:07 +02:00
72a58bc299 migrations: fix file indices 2025-05-22 16:57:06 +02:00
c208ef75f2 migrations: precalculate word search score with triggers 2025-05-22 16:57:06 +02:00
bfcb2bfc97 lib/cli: allow specifying query for kanji/word 2025-05-22 16:57:06 +02:00
52a686ac29 lib/search/word: fix english sql query 2025-05-22 16:57:05 +02:00
8bff2c6799 KANJIDIC: add orderNum to yomi + meaning 2025-05-21 12:57:08 +02:00
b8eba03000 lib/search: use const table names 2025-05-21 12:38:25 +02:00
4d75bef208 migrations: create virtual col common on jmdict kanji/reading elements 2025-05-20 13:08:16 +02:00
08f25f6277 lib/models/jmdict_pos: add shortDescription 2025-05-20 00:03:32 +02:00
7fee0435e8 pubspec.lock: bump packages 2025-05-19 21:46:19 +02:00
73640d01f6 lib/search/kanji: make taughtIn into int? 2025-05-19 20:18:36 +02:00
2875f7a65f lib/search/kanji: allow empty names for radicals 2025-05-19 19:48:17 +02:00
155a3aa658 treewide: format 2025-05-19 16:40:37 +02:00
0351b7a9df lib/search: make optional word args available in public api 2025-05-19 16:40:36 +02:00
3cc61e6ba9 .gitignore: add /doc 2025-05-19 16:40:36 +02:00
ebe29db092 lib/search: document public search functions 2025-05-19 16:40:36 +02:00
128dd14a0d lib/search: generate list of ? instead of interpolation 2025-05-19 16:40:35 +02:00
501d3a089e lib/search/radical_search: implement 2025-05-19 16:40:35 +02:00
e0ffd89ff4 lib/search/word_search: use map comprehensions 2025-05-19 16:40:35 +02:00
e30ed8ba9b lib/search/kanji: add more data to result 2025-05-19 16:40:34 +02:00
d508b5e244 migrations: add constraint for kanjidic skip misclassifications 2025-05-19 16:40:34 +02:00
31c3fb807e treewide: redo handling of kanjidic radicals 2025-05-19 16:40:34 +02:00
60d2017473 lib/search/kanji: add codepoints 2025-05-19 16:40:33 +02:00
135d81db80 lib/search/kanji: query kanji parts 2025-05-19 16:40:33 +02:00
f8549bf55a lib/_data_ingestion/kanjidic: correctly parse radical names from xml 2025-05-19 16:40:33 +02:00
69d86b34ff migrations: remove already fixed TODO 2025-05-19 16:40:30 +02:00
6d7c068e7b lib/search/word_search: query reading/kanji elements in correct order 2025-05-18 23:53:30 +02:00
b6661c734f lib/search/word_search: add word count search 2025-05-16 23:50:01 +02:00
080638e7ef lib/search/word_search: assert no missing rubys 2025-05-16 21:30:33 +02:00
03d536d7d1 nix: add package for database with WAL enabled 2025-05-16 21:29:53 +02:00
3f267b78d3 lib: format 2025-05-16 21:06:19 +02:00
84ae7eca9e lib/search/word_search: move some score calculation to virtual column 2025-05-16 21:03:37 +02:00
45c4c5f09a lib/cli/query-word: stringify 2025-05-16 18:47:26 +02:00
369fcdbd4b lib/search/word_search: fix rubification 2025-05-16 18:46:57 +02:00
33cf5028f4 lib/search/word_search: score and order results by several metrics 2025-05-16 18:46:39 +02:00
90d5717928 migrations: move JMdict views to 0009_views 2025-05-16 18:43:58 +02:00
bb9550380c lib/search/word_search: limit entry id query to single column 2025-05-16 17:06:01 +02:00
3680827176 lib/search/word_search: use const table names 2025-05-16 17:06:01 +02:00
9d9ce840fa lib/search/word_search: split 2025-05-16 17:06:00 +02:00
6c580e95e2 lib/search/word_search: pagination 2025-05-16 17:06:00 +02:00
2db99e76f0 lib/_data_ingestion: perform VACUUM after data ingestion 2025-05-16 17:06:00 +02:00
42db69e57a lib/cli/create_db: make WAL mode optional 2025-05-16 17:05:59 +02:00
4407c06f12 lib/cli: add timing logs to query commands 2025-05-16 17:05:59 +02:00
fc0956d5c3 lib/text_filtering: add kanjiRegex 2025-05-16 17:05:57 +02:00
d7f7f9cd19 Filter data out into separate files 2025-05-16 09:22:28 +02:00
cf95f85caa Add kanji filtering utility 2025-05-16 09:17:18 +02:00
f278b34415 lib/_data_ingestion: enable WAL 2025-05-15 22:31:54 +02:00
30d8160698 migrations: add FTS tables + view for querying example words 2025-05-15 13:03:48 +02:00
b07fc8f4b3 lib/word_search: calculate isCommon 2025-05-14 20:59:57 +02:00
8299572225 migrations: add view for base/furigana queries 2025-05-14 17:12:31 +02:00
78ba1bae1a lib/search: query readings for xrefs 2025-05-14 17:12:31 +02:00
87383c8951 lib/search: query jlpt tags on word search 2025-05-14 17:12:31 +02:00
cd9b318956 lib/cli/create_db: await seeding data 2025-05-14 17:12:30 +02:00
96f52b5860 lib/_data_ingestion/tanos-jlpt: report sql errors 2025-05-14 17:12:30 +02:00
59e8db5add Query more detailed information about langauge source 2025-05-14 17:12:30 +02:00
9038119eb7 Consistently use senseId name everywhere 2025-05-14 17:12:29 +02:00
3290d5dc91 Consistently use entryId name everywhere 2025-05-14 17:12:29 +02:00
4647ab2286 flake.nix: add sqlite-analyzer to devshell 2025-05-14 17:12:29 +02:00
2980bcda06 lib/_data_ingestion/jmdict: format 2025-05-14 17:12:29 +02:00
1661817819 migrations/JMDict: add extra constraints, clean up pkeys, etc. 2025-05-14 17:12:28 +02:00
581f9daf25 lib/_data_ingestion: add phrases for language source data 2025-05-14 17:12:28 +02:00
9898793bca Convert entryBy* tables into views 2025-05-14 17:12:28 +02:00
2e7e8851e1 pubspec.yaml: relax deps 2025-05-13 22:19:02 +02:00
92 changed files with 6545 additions and 3838 deletions

1
.gitignore vendored
View File

@@ -6,6 +6,7 @@
.packages
# Conventional directory for build output.
/doc/
/build/
main.db

41
analysis_options.yaml Normal file
View File

@@ -0,0 +1,41 @@
# This file configures the analyzer, which statically analyzes Dart code to
# check for errors, warnings, and lints.
#
# The issues identified by the analyzer are surfaced in the UI of Dart-enabled
# IDEs (https://dart.dev/tools#ides-and-editors). The analyzer can also be
# invoked from the command line by running `flutter analyze`.
# The following line activates a set of recommended lints for Flutter apps,
# packages, and plugins designed to encourage good coding practices.
include:
- package:lints/recommended.yaml
linter:
# The lint rules applied to this project can be customized in the
# section below to disable rules from the `package:flutter_lints/flutter.yaml`
# included above or to enable additional rules. A list of all available lints
# and their documentation is published at https://dart.dev/lints.
#
# Instead of disabling a lint rule for the entire project in the
# section below, it can also be suppressed for a single line of code
# or a specific dart file by using the `// ignore: name_of_lint` and
# `// ignore_for_file: name_of_lint` syntax on the line or in the file
# producing the lint.
rules:
always_declare_return_types: true
annotate_redeclares: true
avoid_print: false
avoid_setters_without_getters: true
avoid_slow_async_io: true
directives_ordering: true
eol_at_end_of_file: true
prefer_const_declarations: true
prefer_contains: true
prefer_final_fields: true
prefer_final_locals: true
prefer_single_quotes: true
use_key_in_widget_constructors: true
use_null_aware_elements: true
# Additional information about this file can be found at
# https://dart.dev/guides/language/analysis-options

View File

@@ -2,18 +2,20 @@ import 'package:args/command_runner.dart';
import 'package:jadb/cli/commands/create_db.dart';
import 'package:jadb/cli/commands/create_tanos_jlpt_mappings.dart';
import 'package:jadb/cli/commands/lemmatize.dart';
import 'package:jadb/cli/commands/query_kanji.dart';
import 'package:jadb/cli/commands/query_word.dart';
Future<void> main(List<String> args) async {
final runner = CommandRunner(
'jadb',
"CLI tool to help creating and testing the jadb database",
'CLI tool to help creating and testing the jadb database',
);
runner.addCommand(CreateDb());
runner.addCommand(QueryKanji());
runner.addCommand(QueryWord());
runner.addCommand(Lemmatize());
runner.addCommand(CreateTanosJlptMappings());
runner.run(args);

12
flake.lock generated
View File

@@ -3,7 +3,7 @@
"jmdict-src": {
"flake": false,
"locked": {
"narHash": "sha256-84P7r/fFlBnawy6yChrD9WMHmOWcEGWUmoK70N4rdGQ=",
"narHash": "sha256-5Y4ySJadyNF/Ckjv9rEjIpLnoN0YpbN+cvOawqiuo5Y=",
"type": "file",
"url": "http://ftp.edrdg.org/pub/Nihongo/JMdict_e.gz"
},
@@ -15,7 +15,7 @@
"jmdict-with-examples-src": {
"flake": false,
"locked": {
"narHash": "sha256-PM0sv7VcsCya2Ek02CI7hVwB3Jawn6bICSI+dsJK0yo=",
"narHash": "sha256-/lOum1C/0zuq9W+g/TajsOgkTeai8vW4ubUdfX8ahX0=",
"type": "file",
"url": "http://ftp.edrdg.org/pub/Nihongo/JMdict_e_examp.gz"
},
@@ -27,7 +27,7 @@
"kanjidic2-src": {
"flake": false,
"locked": {
"narHash": "sha256-Lc0wUPpuDKuMDv2t87//w3z20RX8SMJI2iIRtUJ8fn0=",
"narHash": "sha256-2RCsAsosBjMAgTzmd8YLa5qP9HIVy6wP4DoMNy1LCKM=",
"type": "file",
"url": "https://www.edrdg.org/kanjidic/kanjidic2.xml.gz"
},
@@ -38,11 +38,11 @@
},
"nixpkgs": {
"locked": {
"lastModified": 1746904237,
"narHash": "sha256-3e+AVBczosP5dCLQmMoMEogM57gmZ2qrVSrmq9aResQ=",
"lastModified": 1752480373,
"narHash": "sha256-JHQbm+OcGp32wAsXTE/FLYGNpb+4GLi5oTvCxwSoBOA=",
"owner": "NixOS",
"repo": "nixpkgs",
"rev": "d89fc19e405cb2d55ce7cc114356846a0ee5e956",
"rev": "62e0f05ede1da0d54515d4ea8ce9c733f12d9f08",
"type": "github"
},
"original": {

View File

@@ -81,6 +81,7 @@
dart
gnumake
sqlite-interactive
sqlite-analyzer
sqlite-web
sqlint
sqlfluff
@@ -103,10 +104,24 @@
platforms = lib.platforms.all;
};
src = lib.cleanSource ./.;
src = builtins.filterSource (path: type: let
baseName = baseNameOf (toString path);
in !(lib.any (b: b) [
(!(lib.cleanSourceFilter path type))
(baseName == ".github" && type == "directory")
(baseName == "nix" && type == "directory")
(baseName == ".envrc" && type == "regular")
(baseName == "flake.lock" && type == "regular")
(baseName == "flake.nix" && type == "regular")
])) ./.;
in forAllSystems (system: pkgs: {
default = self.packages.${system}.database;
filteredSource = pkgs.runCommandLocal "filtered-source" { } ''
ln -s ${src} $out
'';
jmdict = pkgs.callPackage ./nix/jmdict.nix {
inherit jmdict-src jmdict-with-examples-src edrdgMetadata;
};
@@ -128,6 +143,12 @@
inherit src;
};
database-wal = pkgs.callPackage ./nix/database.nix {
inherit (self.packages.${system}) database-tool jmdict radkfile kanjidic2;
inherit src;
wal = true;
};
docs = pkgs.callPackage ./nix/docs.nix {
inherit (self.packages.${system}) database;
};

View File

@@ -16,14 +16,15 @@ abstract class Element extends SQLWritable {
this.nf,
});
@override
Map<String, Object?> get sqlValue => {
'reading': reading,
'news': news,
'ichi': ichi,
'spec': spec,
'gai': gai,
'nf': nf,
};
'reading': reading,
'news': news,
'ichi': ichi,
'spec': spec,
'gai': gai,
'nf': nf,
};
}
class KanjiElement extends Element {
@@ -33,26 +34,19 @@ class KanjiElement extends Element {
KanjiElement({
this.info = const [],
required this.orderNum,
required String reading,
int? news,
int? ichi,
int? spec,
int? gai,
int? nf,
}) : super(
reading: reading,
news: news,
ichi: ichi,
spec: spec,
gai: gai,
nf: nf,
);
required super.reading,
super.news,
super.ichi,
super.spec,
super.gai,
super.nf,
});
@override
Map<String, Object?> get sqlValue => {
...super.sqlValue,
'orderNum': orderNum,
};
...super.sqlValue,
'orderNum': orderNum,
};
}
class ReadingElement extends Element {
@@ -66,27 +60,20 @@ class ReadingElement extends Element {
required this.readingDoesNotMatchKanji,
this.info = const [],
this.restrictions = const [],
required String reading,
int? news,
int? ichi,
int? spec,
int? gai,
int? nf,
}) : super(
reading: reading,
news: news,
ichi: ichi,
spec: spec,
gai: gai,
nf: nf,
);
required super.reading,
super.news,
super.ichi,
super.spec,
super.gai,
super.nf,
});
@override
Map<String, Object?> get sqlValue => {
...super.sqlValue,
'orderNum': orderNum,
'readingDoesNotMatchKanji': readingDoesNotMatchKanji,
};
...super.sqlValue,
'orderNum': orderNum,
'readingDoesNotMatchKanji': readingDoesNotMatchKanji,
};
}
class LanguageSource extends SQLWritable {
@@ -97,18 +84,18 @@ class LanguageSource extends SQLWritable {
const LanguageSource({
required this.language,
this.phrase,
required this.phrase,
this.fullyDescribesSense = true,
this.constructedFromSmallerWords = false,
});
@override
Map<String, Object?> get sqlValue => {
'language': language,
'phrase': phrase,
'fullyDescribesSense': fullyDescribesSense,
'constructedFromSmallerWords': constructedFromSmallerWords,
};
'language': language,
'phrase': phrase,
'fullyDescribesSense': fullyDescribesSense,
'constructedFromSmallerWords': constructedFromSmallerWords,
};
}
class Glossary extends SQLWritable {
@@ -116,52 +103,45 @@ class Glossary extends SQLWritable {
final String phrase;
final String? type;
const Glossary({
required this.language,
required this.phrase,
this.type,
});
const Glossary({required this.language, required this.phrase, this.type});
@override
Map<String, Object?> get sqlValue => {
'language': language,
'phrase': phrase,
'type': type,
};
'language': language,
'phrase': phrase,
'type': type,
};
}
final kanaRegex =
RegExp(r'^[\p{Script=Katakana}\p{Script=Hiragana}ー]+$', unicode: true);
final kanaRegex = RegExp(
r'^[\p{Script=Katakana}\p{Script=Hiragana}ー]+$',
unicode: true,
);
class XRefParts {
final String? kanjiRef;
final String? readingRef;
final int? senseOrderNum;
const XRefParts({
this.kanjiRef,
this.readingRef,
this.senseOrderNum,
}) : assert(kanjiRef != null || readingRef != null);
const XRefParts({this.kanjiRef, this.readingRef, this.senseOrderNum})
: assert(kanjiRef != null || readingRef != null);
Map<String, Object?> toJson() => {
'kanjiRef': kanjiRef,
'readingRef': readingRef,
'senseOrderNum': senseOrderNum,
};
'kanjiRef': kanjiRef,
'readingRef': readingRef,
'senseOrderNum': senseOrderNum,
};
}
class XRef {
final String entryId;
final String reading;
const XRef({
required this.entryId,
required this.reading,
});
const XRef({required this.entryId, required this.reading});
}
class Sense extends SQLWritable {
final int id;
final int senseId;
final int orderNum;
final List<XRefParts> antonyms;
final List<String> dialects;
@@ -176,7 +156,7 @@ class Sense extends SQLWritable {
final List<XRefParts> seeAlso;
const Sense({
required this.id,
required this.senseId,
required this.orderNum,
this.antonyms = const [],
this.dialects = const [],
@@ -193,11 +173,12 @@ class Sense extends SQLWritable {
@override
Map<String, Object?> get sqlValue => {
'id': id,
'orderNum': orderNum,
};
'senseId': senseId,
'orderNum': orderNum,
};
bool get isEmpty => antonyms.isEmpty &&
bool get isEmpty =>
antonyms.isEmpty &&
dialects.isEmpty &&
fields.isEmpty &&
info.isEmpty &&
@@ -211,17 +192,18 @@ class Sense extends SQLWritable {
}
class Entry extends SQLWritable {
final int id;
final int entryId;
final List<KanjiElement> kanji;
final List<ReadingElement> readings;
final List<Sense> senses;
const Entry({
required this.id,
required this.entryId,
required this.kanji,
required this.readings,
required this.senses,
});
Map<String, Object?> get sqlValue => {'id': id};
@override
Map<String, Object?> get sqlValue => {'entryId': entryId};
}

View File

@@ -2,8 +2,7 @@ import 'dart:collection';
import 'package:collection/collection.dart';
import 'package:jadb/_data_ingestion/jmdict/objects.dart';
import 'package:jadb/_data_ingestion/jmdict/table_names.dart';
import 'package:jadb/util/romaji_transliteration.dart';
import 'package:jadb/table_names/jmdict.dart';
import 'package:sqflite_common/sqlite_api.dart';
class ResolvedXref {
@@ -19,18 +18,20 @@ ResolvedXref resolveXref(
XRefParts xref,
) {
List<Entry> candidateEntries = switch ((xref.kanjiRef, xref.readingRef)) {
(null, null) =>
throw Exception('Xref $xref has no kanji or reading reference'),
(String k, null) => entriesByKanji[k]!.toList(),
(null, String r) => entriesByReading[r]!.toList(),
(String k, String r) =>
(null, null) => throw Exception(
'Xref $xref has no kanji or reading reference',
),
(final String k, null) => entriesByKanji[k]!.toList(),
(null, final String r) => entriesByReading[r]!.toList(),
(final String k, final String r) =>
entriesByKanji[k]!.intersection(entriesByReading[r]!).toList(),
};
// Filter out entries that don't have the number of senses specified in the xref
if (xref.senseOrderNum != null) {
candidateEntries
.retainWhere((entry) => entry.senses.length >= xref.senseOrderNum!);
candidateEntries.retainWhere(
(entry) => entry.senses.length >= xref.senseOrderNum!,
);
}
// If the xref has a reading ref but no kanji ref, and there are multiple
@@ -39,8 +40,9 @@ ResolvedXref resolveXref(
if (xref.kanjiRef == null &&
xref.readingRef != null &&
candidateEntries.length > 1) {
final candidatesWithEmptyKanji =
candidateEntries.where((entry) => entry.kanji.length == 0).toList();
final candidatesWithEmptyKanji = candidateEntries
.where((entry) => entry.kanji.isEmpty)
.toList();
if (candidatesWithEmptyKanji.isNotEmpty) {
candidateEntries = candidatesWithEmptyKanji;
@@ -51,7 +53,7 @@ ResolvedXref resolveXref(
// entry in case there are multiple candidates left.
candidateEntries.sortBy<num>((entry) => entry.senses.length);
if (candidateEntries.length == 0) {
if (candidateEntries.isEmpty) {
throw Exception(
'SKIPPING: Xref $xref has ${candidateEntries.length} entries, '
'kanjiRef: ${xref.kanjiRef}, readingRef: ${xref.readingRef}, '
@@ -72,75 +74,44 @@ ResolvedXref resolveXref(
Future<void> seedJMDictData(List<Entry> entries, Database db) async {
print(' [JMdict] Batch 1 - Kanji and readings');
Batch b = db.batch();
int elementId = 0;
for (final e in entries) {
b.insert(JMdictTableNames.entry, e.sqlValue);
for (final k in e.kanji) {
b.insert(JMdictTableNames.kanjiElement, k.sqlValue..addAll({'entryId': e.id}));
// b.insert(
// JMdictTableNames.entryByKana,
// {'entryId': e.id, 'kana': transliterateKatakanaToHiragana(k.reading)},
// // Some entries have the same reading twice with difference in katakana and hiragana
// conflictAlgorithm: ConflictAlgorithm.ignore,
// );
elementId++;
b.insert(
JMdictTableNames.kanjiElement,
k.sqlValue..addAll({'entryId': e.entryId, 'elementId': elementId}),
);
for (final i in k.info) {
b.insert(
JMdictTableNames.kanjiInfo,
{
'entryId': e.id,
'reading': k.reading,
'info': i,
},
);
b.insert(JMdictTableNames.kanjiInfo, {
'elementId': elementId,
'info': i,
});
}
}
for (final r in e.readings) {
elementId++;
b.insert(
JMdictTableNames.readingElement,
r.sqlValue..addAll({'entryId': e.id}),
r.sqlValue..addAll({'entryId': e.entryId, 'elementId': elementId}),
);
b.insert(
JMdictTableNames.entryByKana,
{
'entryId': e.id,
'kana': transliterateKanaToLatin(r.reading),
},
// Some entries have the same reading twice with difference in katakana and hiragana
conflictAlgorithm: ConflictAlgorithm.ignore,
);
for (final i in r.info) {
b.insert(
JMdictTableNames.readingInfo,
{
'entryId': e.id,
'reading': r.reading,
'info': i,
},
);
b.insert(JMdictTableNames.readingInfo, {
'elementId': elementId,
'info': i,
});
}
for (final res in r.restrictions) {
b.insert(
JMdictTableNames.readingRestriction,
{
'entryId': e.id,
'reading': r.reading,
'restriction': res,
},
);
}
}
for (final s in e.senses) {
for (final g in s.glossary) {
b.insert(
JMdictTableNames.entryByEnglish,
{
'entryId': e.id,
'english': g.phrase,
},
// Some entries have the same reading twice with difference in katakana and hiragana
conflictAlgorithm: ConflictAlgorithm.ignore,
);
b.insert(JMdictTableNames.readingRestriction, {
'elementId': elementId,
'restriction': res,
});
}
}
}
@@ -149,52 +120,58 @@ Future<void> seedJMDictData(List<Entry> entries, Database db) async {
print(' [JMdict] Batch 2 - Senses');
b = db.batch();
for (final e in entries) {
for (final s in e.senses) {
b.insert(JMdictTableNames.sense, s.sqlValue..addAll({'entryId': e.id}));
b.insert(
JMdictTableNames.sense,
s.sqlValue..addAll({'entryId': e.entryId}),
);
for (final d in s.dialects) {
b.insert(JMdictTableNames.senseDialect, {'senseId': s.id, 'dialect': d});
b.insert(JMdictTableNames.senseDialect, {
'senseId': s.senseId,
'dialect': d,
});
}
for (final f in s.fields) {
b.insert(JMdictTableNames.senseField, {'senseId': s.id, 'field': f});
b.insert(JMdictTableNames.senseField, {
'senseId': s.senseId,
'field': f,
});
}
for (final i in s.info) {
b.insert(JMdictTableNames.senseInfo, {'senseId': s.id, 'info': i});
b.insert(JMdictTableNames.senseInfo, {'senseId': s.senseId, 'info': i});
}
for (final m in s.misc) {
b.insert(JMdictTableNames.senseMisc, {'senseId': s.id, 'misc': m});
b.insert(JMdictTableNames.senseMisc, {'senseId': s.senseId, 'misc': m});
}
for (final p in s.pos) {
b.insert(JMdictTableNames.sensePOS, {'senseId': s.id, 'pos': p});
}
for (final l in s.languageSource) {
b.insert(
JMdictTableNames.senseLanguageSource,
l.sqlValue..addAll({'senseId': s.id}),
);
b.insert(JMdictTableNames.sensePOS, {'senseId': s.senseId, 'pos': p});
}
for (final rk in s.restrictedToKanji) {
b.insert(
JMdictTableNames.senseRestrictedToKanji,
{'entryId': e.id, 'senseId': s.id, 'kanji': rk},
);
b.insert(JMdictTableNames.senseRestrictedToKanji, {
'entryId': e.entryId,
'senseId': s.senseId,
'kanji': rk,
});
}
for (final rr in s.restrictedToReading) {
b.insert(
JMdictTableNames.senseRestrictedToReading,
{'entryId': e.id, 'senseId': s.id, 'reading': rr},
);
b.insert(JMdictTableNames.senseRestrictedToReading, {
'entryId': e.entryId,
'senseId': s.senseId,
'reading': rr,
});
}
for (final ls in s.languageSource) {
b.insert(
JMdictTableNames.senseLanguageSource,
ls.sqlValue..addAll({'senseId': s.id}),
ls.sqlValue..addAll({'senseId': s.senseId}),
);
}
for (final g in s.glossary) {
b.insert(
JMdictTableNames.senseGlossary,
g.sqlValue..addAll({'senseId': s.id}),
g.sqlValue..addAll({'senseId': s.senseId}),
);
}
}
@@ -203,7 +180,8 @@ Future<void> seedJMDictData(List<Entry> entries, Database db) async {
await b.commit(noResult: true);
print(' [JMdict] Building xref trees');
SplayTreeMap<String, Set<Entry>> entriesByKanji = SplayTreeMap();
final SplayTreeMap<String, Set<Entry>> entriesByKanji = SplayTreeMap();
for (final entry in entries) {
for (final kanji in entry.kanji) {
if (entriesByKanji.containsKey(kanji.reading)) {
@@ -213,7 +191,7 @@ Future<void> seedJMDictData(List<Entry> entries, Database db) async {
}
}
}
SplayTreeMap<String, Set<Entry>> entriesByReading = SplayTreeMap();
final SplayTreeMap<String, Set<Entry>> entriesByReading = SplayTreeMap();
for (final entry in entries) {
for (final reading in entry.readings) {
if (entriesByReading.containsKey(reading.reading)) {
@@ -236,17 +214,14 @@ Future<void> seedJMDictData(List<Entry> entries, Database db) async {
xref,
);
b.insert(
JMdictTableNames.senseSeeAlso,
{
'senseId': s.id,
'xrefEntryId': resolvedEntry.entry.id,
'seeAlsoKanji': xref.kanjiRef,
'seeAlsoReading': xref.readingRef,
'seeAlsoSense': xref.senseOrderNum,
'ambiguous': resolvedEntry.ambiguous,
},
);
b.insert(JMdictTableNames.senseSeeAlso, {
'senseId': s.senseId,
'xrefEntryId': resolvedEntry.entry.entryId,
'seeAlsoKanji': xref.kanjiRef,
'seeAlsoReading': xref.readingRef,
'seeAlsoSense': xref.senseOrderNum,
'ambiguous': resolvedEntry.ambiguous,
});
}
for (final ant in s.antonyms) {
@@ -257,8 +232,8 @@ Future<void> seedJMDictData(List<Entry> entries, Database db) async {
);
b.insert(JMdictTableNames.senseAntonyms, {
'senseId': s.id,
'xrefEntryId': resolvedEntry.entry.id,
'senseId': s.senseId,
'xrefEntryId': resolvedEntry.entry.entryId,
'antonymKanji': ant.kanjiRef,
'antonymReading': ant.readingRef,
'antonymSense': ant.senseOrderNum,

View File

@@ -8,15 +8,16 @@ List<int?> getPriorityValues(XmlElement e, String prefix) {
int? news, ichi, spec, gai, nf;
for (final pri in e.findElements('${prefix}_pri')) {
final txt = pri.innerText;
if (txt.startsWith('news'))
if (txt.startsWith('news')) {
news = int.parse(txt.substring(4));
else if (txt.startsWith('ichi'))
} else if (txt.startsWith('ichi'))
ichi = int.parse(txt.substring(4));
else if (txt.startsWith('spec'))
spec = int.parse(txt.substring(4));
else if (txt.startsWith('gai'))
gai = int.parse(txt.substring(3));
else if (txt.startsWith('nf')) nf = int.parse(txt.substring(2));
else if (txt.startsWith('nf'))
nf = int.parse(txt.substring(2));
}
return [news, ichi, spec, gai, nf];
}
@@ -46,10 +47,7 @@ XRefParts parseXrefParts(String s) {
);
}
} else {
result = XRefParts(
kanjiRef: parts[0],
readingRef: parts[1],
);
result = XRefParts(kanjiRef: parts[0], readingRef: parts[1]);
}
break;
@@ -82,7 +80,7 @@ List<Entry> parseJMDictData(XmlElement root) {
final List<Sense> senses = [];
for (final (kanjiNum, k_ele) in entry.findElements('k_ele').indexed) {
final ke_pri = getPriorityValues(k_ele, 'ke');
final kePri = getPriorityValues(k_ele, 'ke');
kanjiEls.add(
KanjiElement(
orderNum: kanjiNum + 1,
@@ -91,19 +89,20 @@ List<Entry> parseJMDictData(XmlElement root) {
.map((e) => e.innerText.substring(1, e.innerText.length - 1))
.toList(),
reading: k_ele.findElements('keb').first.innerText,
news: ke_pri[0],
ichi: ke_pri[1],
spec: ke_pri[2],
gai: ke_pri[3],
nf: ke_pri[4],
news: kePri[0],
ichi: kePri[1],
spec: kePri[2],
gai: kePri[3],
nf: kePri[4],
),
);
}
for (final (orderNum, r_ele) in entry.findElements('r_ele').indexed) {
final re_pri = getPriorityValues(r_ele, 're');
final readingDoesNotMatchKanji =
r_ele.findElements('re_nokanji').isNotEmpty;
final rePri = getPriorityValues(r_ele, 're');
final readingDoesNotMatchKanji = r_ele
.findElements('re_nokanji')
.isNotEmpty;
readingEls.add(
ReadingElement(
orderNum: orderNum + 1,
@@ -112,14 +111,16 @@ List<Entry> parseJMDictData(XmlElement root) {
.findElements('re_inf')
.map((e) => e.innerText.substring(1, e.innerText.length - 1))
.toList(),
restrictions:
r_ele.findElements('re_restr').map((e) => e.innerText).toList(),
restrictions: r_ele
.findElements('re_restr')
.map((e) => e.innerText)
.toList(),
reading: r_ele.findElements('reb').first.innerText,
news: re_pri[0],
ichi: re_pri[1],
spec: re_pri[2],
gai: re_pri[3],
nf: re_pri[4],
news: rePri[0],
ichi: rePri[1],
spec: rePri[2],
gai: rePri[3],
nf: rePri[4],
),
);
}
@@ -127,12 +128,16 @@ List<Entry> parseJMDictData(XmlElement root) {
for (final (orderNum, sense) in entry.findElements('sense').indexed) {
senseId++;
final result = Sense(
id: senseId,
senseId: senseId,
orderNum: orderNum + 1,
restrictedToKanji:
sense.findElements('stagk').map((e) => e.innerText).toList(),
restrictedToReading:
sense.findElements('stagr').map((e) => e.innerText).toList(),
restrictedToKanji: sense
.findElements('stagk')
.map((e) => e.innerText)
.toList(),
restrictedToReading: sense
.findElements('stagr')
.map((e) => e.innerText)
.toList(),
pos: sense
.findElements('pos')
.map((e) => e.innerText.substring(1, e.innerText.length - 1))
@@ -151,6 +156,7 @@ List<Entry> parseJMDictData(XmlElement root) {
.map(
(e) => LanguageSource(
language: e.getAttribute('xml:lang') ?? 'eng',
phrase: e.innerText.isNotEmpty ? e.innerText : null,
fullyDescribesSense: e.getAttribute('ls_type') == 'part',
constructedFromSmallerWords: e.getAttribute('ls_wasei') == 'y',
),
@@ -189,7 +195,7 @@ List<Entry> parseJMDictData(XmlElement root) {
entries.add(
Entry(
id: entryId,
entryId: entryId,
kanji: kanjiEls,
readings: readingEls,
senses: senses,

View File

@@ -13,45 +13,33 @@ class CodePoint extends SQLWritable {
@override
Map<String, Object?> get sqlValue => {
'kanji': kanji,
'type': type,
'codepoint': codepoint,
};
'kanji': kanji,
'type': type,
'codepoint': codepoint,
};
}
class Radical extends SQLWritable {
final String kanji;
final String type;
final String radical;
final int radicalId;
const Radical({
required this.kanji,
required this.type,
required this.radical,
});
const Radical({required this.kanji, required this.radicalId});
@override
Map<String, Object?> get sqlValue => {
'kanji': kanji,
'type': type,
'radical': radical,
};
Map<String, Object?> get sqlValue => {'kanji': kanji, 'radicalId': radicalId};
}
class StrokeMiscount extends SQLWritable {
final String kanji;
final int strokeCount;
const StrokeMiscount({
required this.kanji,
required this.strokeCount,
});
const StrokeMiscount({required this.kanji, required this.strokeCount});
@override
Map<String, Object?> get sqlValue => {
'kanji': kanji,
'strokeCount': strokeCount,
};
'kanji': kanji,
'strokeCount': strokeCount,
};
}
class Variant extends SQLWritable {
@@ -67,10 +55,10 @@ class Variant extends SQLWritable {
@override
Map<String, Object?> get sqlValue => {
'kanji': kanji,
'type': type,
'variant': variant,
};
'kanji': kanji,
'type': type,
'variant': variant,
};
}
class DictionaryReference extends SQLWritable {
@@ -86,10 +74,10 @@ class DictionaryReference extends SQLWritable {
@override
Map<String, Object?> get sqlValue => {
'kanji': kanji,
'type': type,
'ref': ref,
};
'kanji': kanji,
'type': type,
'ref': ref,
};
}
class DictionaryReferenceMoro extends SQLWritable {
@@ -107,11 +95,11 @@ class DictionaryReferenceMoro extends SQLWritable {
@override
Map<String, Object?> get sqlValue => {
'kanji': kanji,
'ref': ref,
'volume': volume,
'page': page,
};
'kanji': kanji,
'ref': ref,
'volume': volume,
'page': page,
};
}
class QueryCode extends SQLWritable {
@@ -129,11 +117,11 @@ class QueryCode extends SQLWritable {
@override
Map<String, Object?> get sqlValue => {
'kanji': kanji,
'code': code,
'type': type,
'skipMisclassification': skipMisclassification,
};
'kanji': kanji,
'code': code,
'type': type,
'skipMisclassification': skipMisclassification,
};
}
class Reading extends SQLWritable {
@@ -149,10 +137,10 @@ class Reading extends SQLWritable {
@override
Map<String, Object?> get sqlValue => {
'kanji': kanji,
'type': type,
'reading': reading,
};
'kanji': kanji,
'type': type,
'reading': reading,
};
}
class Kunyomi extends SQLWritable {
@@ -168,10 +156,10 @@ class Kunyomi extends SQLWritable {
@override
Map<String, Object?> get sqlValue => {
'kanji': kanji,
'yomi': yomi,
'isJouyou': isJouyou,
};
'kanji': kanji,
'yomi': yomi,
'isJouyou': isJouyou,
};
}
class Onyomi extends SQLWritable {
@@ -189,11 +177,11 @@ class Onyomi extends SQLWritable {
@override
Map<String, Object?> get sqlValue => {
'kanji': kanji,
'yomi': yomi,
'isJouyou': isJouyou,
'type': type,
};
'kanji': kanji,
'yomi': yomi,
'isJouyou': isJouyou,
'type': type,
};
}
class Meaning extends SQLWritable {
@@ -209,10 +197,10 @@ class Meaning extends SQLWritable {
@override
Map<String, Object?> get sqlValue => {
'kanji': kanji,
'language': language,
'meaning': meaning,
};
'kanji': kanji,
'language': language,
'meaning': meaning,
};
}
class Character extends SQLWritable {
@@ -224,7 +212,7 @@ class Character extends SQLWritable {
final List<String> radicalName;
final List<CodePoint> codepoints;
final List<Radical> radicals;
final Radical? radical;
final List<int> strokeMiscounts;
final List<Variant> variants;
final List<DictionaryReference> dictionaryReferences;
@@ -244,7 +232,7 @@ class Character extends SQLWritable {
this.jlpt,
this.radicalName = const [],
this.codepoints = const [],
this.radicals = const [],
required this.radical,
this.strokeMiscounts = const [],
this.variants = const [],
this.dictionaryReferences = const [],
@@ -257,11 +245,12 @@ class Character extends SQLWritable {
this.nanori = const [],
});
@override
Map<String, Object?> get sqlValue => {
'literal': literal,
'grade': grade,
'strokeCount': strokeCount,
'frequency': frequency,
'jlpt': jlpt,
};
'literal': literal,
'grade': grade,
'strokeCount': strokeCount,
'frequency': frequency,
'jlpt': jlpt,
};
}

View File

@@ -1,4 +1,4 @@
import 'package:jadb/_data_ingestion/kanjidic/table_names.dart';
import 'package:jadb/table_names/kanjidic.dart';
import 'package:sqflite_common/sqlite_api.dart';
import 'objects.dart';
@@ -14,23 +14,27 @@ Future<void> seedKANJIDICData(List<Character> characters, Database db) async {
// print(c.dictionaryReferences.map((e) => e.sqlValue).toList());
// }
b.insert(KANJIDICTableNames.character, c.sqlValue);
for (final n in c.radicalName) {
b.insert(KANJIDICTableNames.radicalName, {'kanji': c.literal, 'name': n});
assert(c.radical != null, 'Radical name without radical');
b.insert(
KANJIDICTableNames.radicalName,
{'radicalId': c.radical!.radicalId, 'name': n},
conflictAlgorithm: ConflictAlgorithm.ignore,
);
}
for (final cp in c.codepoints) {
b.insert(KANJIDICTableNames.codepoint, cp.sqlValue);
}
for (final r in c.radicals) {
b.insert(KANJIDICTableNames.radical, r.sqlValue);
if (c.radical != null) {
b.insert(KANJIDICTableNames.radical, c.radical!.sqlValue);
}
for (final sm in c.strokeMiscounts) {
b.insert(
KANJIDICTableNames.strokeMiscount,
{
'kanji': c.literal,
'strokeCount': sm,
},
);
b.insert(KANJIDICTableNames.strokeMiscount, {
'kanji': c.literal,
'strokeCount': sm,
});
}
for (final v in c.variants) {
b.insert(KANJIDICTableNames.variant, v.sqlValue);
@@ -52,23 +56,26 @@ Future<void> seedKANJIDICData(List<Character> characters, Database db) async {
for (final r in c.readings) {
b.insert(KANJIDICTableNames.reading, r.sqlValue);
}
for (final k in c.kunyomi) {
b.insert(KANJIDICTableNames.kunyomi, k.sqlValue);
for (final (i, y) in c.kunyomi.indexed) {
b.insert(
KANJIDICTableNames.kunyomi,
y.sqlValue..addAll({'orderNum': i + 1}),
);
}
for (final o in c.onyomi) {
b.insert(KANJIDICTableNames.onyomi, o.sqlValue);
for (final (i, y) in c.onyomi.indexed) {
b.insert(
KANJIDICTableNames.onyomi,
y.sqlValue..addAll({'orderNum': i + 1}),
);
}
for (final m in c.meanings) {
b.insert(KANJIDICTableNames.meaning, m.sqlValue);
for (final (i, m) in c.meanings.indexed) {
b.insert(
KANJIDICTableNames.meaning,
m.sqlValue..addAll({'orderNum': i + 1}),
);
}
for (final n in c.nanori) {
b.insert(
KANJIDICTableNames.nanori,
{
'kanji': c.literal,
'nanori': n,
},
);
b.insert(KANJIDICTableNames.nanori, {'kanji': c.literal, 'nanori': n});
}
}
await b.commit(noResult: true);

View File

@@ -1,4 +1,5 @@
import 'package:jadb/_data_ingestion/kanjidic/objects.dart';
import 'package:jadb/util/romaji_transliteration.dart';
import 'package:xml/xml.dart';
List<Character> parseKANJIDICData(XmlElement root) {
@@ -9,30 +10,33 @@ List<Character> parseKANJIDICData(XmlElement root) {
final codepoint = c.findElements('codepoint').firstOrNull;
final radical = c.findElements('radical').firstOrNull;
final misc = c.findElements('misc').first;
final dic_number = c.findElements('dic_number').firstOrNull;
final query_code = c.findElements('query_code').first;
final reading_meaning = c.findElements('reading_meaning').firstOrNull;
final dicNumber = c.findElements('dic_number').firstOrNull;
final queryCode = c.findElements('query_code').first;
final readingMeaning = c.findElements('reading_meaning').firstOrNull;
// TODO: Group readings and meanings by their rmgroup parent node.
result.add(
Character(
literal: kanji,
strokeCount:
int.parse(misc.findElements('stroke_count').first.innerText),
strokeCount: int.parse(
misc.findElements('stroke_count').first.innerText,
),
grade: int.tryParse(
misc.findElements('grade').firstOrNull?.innerText ?? ''),
misc.findElements('grade').firstOrNull?.innerText ?? '',
),
frequency: int.tryParse(
misc.findElements('freq').firstOrNull?.innerText ?? ''),
misc.findElements('freq').firstOrNull?.innerText ?? '',
),
jlpt: int.tryParse(
misc.findElements('jlpt').firstOrNull?.innerText ?? '',
),
radicalName: radical
?.findElements('rad_name')
.map((e) => e.innerText)
.toList() ??
[],
codepoints: codepoint
radicalName: misc
.findElements('rad_name')
.map((e) => e.innerText)
.toList(),
codepoints:
codepoint
?.findElements('cp_value')
.map(
(e) => CodePoint(
@@ -43,17 +47,13 @@ List<Character> parseKANJIDICData(XmlElement root) {
)
.toList() ??
[],
radicals: radical
?.findElements('rad_value')
.map(
(e) => Radical(
kanji: kanji,
type: e.getAttribute('rad_type')!,
radical: e.innerText,
),
)
.toList() ??
[],
radical: radical
?.findElements('rad_value')
.where((e) => e.getAttribute('rad_type') == 'classical')
.map(
(e) => Radical(kanji: kanji, radicalId: int.parse(e.innerText)),
)
.firstOrNull,
strokeMiscounts: misc
.findElements('stroke_count')
.skip(1)
@@ -69,7 +69,8 @@ List<Character> parseKANJIDICData(XmlElement root) {
),
)
.toList(),
dictionaryReferences: dic_number
dictionaryReferences:
dicNumber
?.findElements('dic_ref')
.where((e) => e.getAttribute('dr_type') != 'moro')
.map(
@@ -81,7 +82,8 @@ List<Character> parseKANJIDICData(XmlElement root) {
)
.toList() ??
[],
dictionaryReferencesMoro: dic_number
dictionaryReferencesMoro:
dicNumber
?.findElements('dic_ref')
.where((e) => e.getAttribute('dr_type') == 'moro')
.map(
@@ -94,7 +96,7 @@ List<Character> parseKANJIDICData(XmlElement root) {
)
.toList() ??
[],
querycodes: query_code
querycodes: queryCode
.findElements('q_code')
.map(
(e) => QueryCode(
@@ -105,7 +107,8 @@ List<Character> parseKANJIDICData(XmlElement root) {
),
)
.toList(),
readings: reading_meaning
readings:
readingMeaning
?.findAllElements('reading')
.where(
(e) =>
@@ -120,7 +123,8 @@ List<Character> parseKANJIDICData(XmlElement root) {
)
.toList() ??
[],
kunyomi: reading_meaning
kunyomi:
readingMeaning
?.findAllElements('reading')
.where((e) => e.getAttribute('r_type') == 'ja_kun')
.map(
@@ -132,19 +136,22 @@ List<Character> parseKANJIDICData(XmlElement root) {
)
.toList() ??
[],
onyomi: reading_meaning
onyomi:
readingMeaning
?.findAllElements('reading')
.where((e) => e.getAttribute('r_type') == 'ja_on')
.map(
(e) => Onyomi(
kanji: kanji,
yomi: e.innerText,
isJouyou: e.getAttribute('r_status') == 'jy',
type: e.getAttribute('on_type')),
kanji: kanji,
yomi: transliterateKatakanaToHiragana(e.innerText),
isJouyou: e.getAttribute('r_status') == 'jy',
type: e.getAttribute('on_type'),
),
)
.toList() ??
[],
meanings: reading_meaning
meanings:
readingMeaning
?.findAllElements('meaning')
.map(
(e) => Meaning(
@@ -155,7 +162,8 @@ List<Character> parseKANJIDICData(XmlElement root) {
)
.toList() ??
[],
nanori: reading_meaning
nanori:
readingMeaning
?.findElements('nanori')
.map((e) => e.innerText)
.toList() ??

View File

@@ -1,10 +1,7 @@
import 'dart:ffi';
import 'dart:io';
import 'package:jadb/_data_ingestion/jmdict/table_names.dart';
import 'package:jadb/_data_ingestion/kanjidic/table_names.dart';
import 'package:jadb/_data_ingestion/radkfile/table_names.dart';
import 'package:jadb/_data_ingestion/tanos-jlpt/table_names.dart';
import 'package:jadb/search.dart';
import 'package:sqflite_common_ffi/sqflite_ffi.dart';
import 'package:sqlite3/open.dart';
@@ -12,7 +9,8 @@ Future<Database> openLocalDb({
String? libsqlitePath,
String? jadbPath,
bool readWrite = false,
bool assertTablesExist = true,
bool verifyTablesExist = true,
bool walMode = false,
}) async {
libsqlitePath ??= Platform.environment['LIBSQLITE_PATH'];
jadbPath ??= Platform.environment['JADB_PATH'];
@@ -24,67 +22,37 @@ Future<Database> openLocalDb({
jadbPath = File(jadbPath).resolveSymbolicLinksSync();
if (libsqlitePath == null) {
throw Exception("LIBSQLITE_PATH is not set");
throw Exception('LIBSQLITE_PATH is not set');
}
if (!File(libsqlitePath).existsSync()) {
throw Exception("LIBSQLITE_PATH does not exist: $libsqlitePath");
throw Exception('LIBSQLITE_PATH does not exist: $libsqlitePath');
}
if (!File(jadbPath).existsSync()) {
throw Exception("JADB_PATH does not exist: $jadbPath");
throw Exception('JADB_PATH does not exist: $jadbPath');
}
final db = await createDatabaseFactoryFfi(
ffiInit: () =>
open.overrideForAll(() => DynamicLibrary.open(libsqlitePath!)),
).openDatabase(
jadbPath,
options: OpenDatabaseOptions(
onOpen: (db) {
db.execute("PRAGMA foreign_keys = ON");
},
readOnly: !readWrite,
),
);
final db =
await createDatabaseFactoryFfi(
ffiInit: () =>
open.overrideForAll(() => DynamicLibrary.open(libsqlitePath!)),
).openDatabase(
jadbPath,
options: OpenDatabaseOptions(
onConfigure: (db) async {
if (walMode) {
await db.execute('PRAGMA journal_mode = WAL');
}
await db.execute('PRAGMA foreign_keys = ON');
},
readOnly: !readWrite,
),
);
if (assertTablesExist) {
await _assertTablesExist(db);
if (verifyTablesExist) {
await db.jadbVerifyTables();
}
return db;
}
Future<void> _assertTablesExist(Database db) async {
final Set<String> tables = await db
.query(
'sqlite_master',
columns: ['name'],
where: 'type = ?',
whereArgs: ['table'],
)
.then((result) {
return result.map((row) => row['name'] as String).toSet();
});
final Set<String> expectedTables = {
...JMdictTableNames.allTables,
...KANJIDICTableNames.allTables,
...RADKFILETableNames.allTables,
...TanosJLPTTableNames.allTables,
};
final missingTables = expectedTables.difference(tables);
if (missingTables.isNotEmpty) {
throw Exception([
'Missing tables:',
missingTables.map((table) => ' - $table').join('\n'),
'',
'Found tables:\n',
tables.map((table) => ' - $table').join('\n'),
'',
'Please ensure the database is correctly set up.',
].join('\n'));
}
}

View File

@@ -3,8 +3,10 @@ import 'dart:io';
Iterable<String> parseRADKFILEBlocks(File radkfile) {
final String content = File('data/tmp/radkfile_utf8').readAsStringSync();
final Iterable<String> blocks =
content.replaceAll(RegExp(r'^#.*$'), '').split(r'$').skip(2);
final Iterable<String> blocks = content
.replaceAll(RegExp(r'^#.*$'), '')
.split(r'$')
.skip(2);
return blocks;
}

View File

@@ -1,27 +1,20 @@
import 'package:jadb/_data_ingestion/radkfile/table_names.dart';
import 'package:jadb/table_names/radkfile.dart';
import 'package:sqflite_common/sqlite_api.dart';
Future<void> seedRADKFILEData(
Iterable<String> blocks,
Database db,
) async {
Future<void> seedRADKFILEData(Iterable<String> blocks, Database db) async {
final b = db.batch();
for (final block in blocks) {
final String radical = block[1];
final List<String> kanjiList = block
.replaceFirst(RegExp(r'.*\n'), '')
.split('')
..removeWhere((e) => e == '' || e == '\n');
final List<String> kanjiList =
block.replaceFirst(RegExp(r'.*\n'), '').split('')
..removeWhere((e) => e == '' || e == '\n');
for (final kanji in kanjiList.toSet()) {
b.insert(
RADKFILETableNames.radkfile,
{
'radical': radical,
'kanji': kanji,
},
);
b.insert(RADKFILETableNames.radkfile, {
'radical': radical,
'kanji': kanji,
});
}
}

View File

@@ -17,14 +17,17 @@ Future<void> seedData(Database db) async {
await parseAndSeedDataFromRADKFILE(db);
await parseAndSeedDataFromKANJIDIC(db);
await parseAndSeedDataFromTanosJLPT(db);
print('Performing VACUUM');
await db.execute('VACUUM');
}
Future<void> parseAndSeedDataFromJMdict(Database db) async {
print('[JMdict] Reading file content...');
String rawXML = File('data/tmp/JMdict.xml').readAsStringSync();
final String rawXML = File('data/tmp/JMdict.xml').readAsStringSync();
print('[JMdict] Parsing XML tags...');
XmlElement root = XmlDocument.parse(rawXML).getElement('JMdict')!;
final XmlElement root = XmlDocument.parse(rawXML).getElement('JMdict')!;
print('[JMdict] Parsing XML content...');
final entries = parseJMDictData(root);
@@ -35,10 +38,10 @@ Future<void> parseAndSeedDataFromJMdict(Database db) async {
Future<void> parseAndSeedDataFromKANJIDIC(Database db) async {
print('[KANJIDIC2] Reading file...');
String rawXML = File('data/tmp/kanjidic2.xml').readAsStringSync();
final String rawXML = File('data/tmp/kanjidic2.xml').readAsStringSync();
print('[KANJIDIC2] Parsing XML...');
XmlElement root = XmlDocument.parse(rawXML).getElement('kanjidic2')!;
final XmlElement root = XmlDocument.parse(rawXML).getElement('kanjidic2')!;
print('[KANJIDIC2] Parsing XML content...');
final entries = parseKANJIDICData(root);
@@ -49,7 +52,7 @@ Future<void> parseAndSeedDataFromKANJIDIC(Database db) async {
Future<void> parseAndSeedDataFromRADKFILE(Database db) async {
print('[RADKFILE] Reading file...');
File raw = File('data/tmp/RADKFILE');
final File raw = File('data/tmp/RADKFILE');
print('[RADKFILE] Parsing content...');
final blocks = parseRADKFILEBlocks(raw);
@@ -60,7 +63,7 @@ Future<void> parseAndSeedDataFromRADKFILE(Database db) async {
Future<void> parseAndSeedDataFromTanosJLPT(Database db) async {
print('[TANOS-JLPT] Reading files...');
Map<String, File> files = {
final Map<String, File> files = {
'N1': File('data/tanos-jlpt/n1.csv'),
'N2': File('data/tanos-jlpt/n2.csv'),
'N3': File('data/tanos-jlpt/n3.csv'),

View File

@@ -3,7 +3,7 @@ abstract class SQLWritable {
const SQLWritable();
/// Returns a map of the object's properties and their values.
///
///
/// Note that there might be properties in the object which is meant to be
/// inserted into a different table. These properties will/should be excluded
/// from this map.

View File

@@ -14,7 +14,7 @@ Future<List<JLPTRankedWord>> parseJLPTRankedWords(
final file = entry.value;
if (!file.existsSync()) {
throw Exception("File $jlptLevel does not exist");
throw Exception('File $jlptLevel does not exist');
}
final rows = await file
@@ -25,29 +25,33 @@ Future<List<JLPTRankedWord>> parseJLPTRankedWords(
for (final row in rows) {
if (row.length != 3) {
throw Exception("Invalid line in $jlptLevel: $row");
throw Exception('Invalid line in $jlptLevel: $row');
}
final kanji = (row[0] as String).isEmpty
? null
: (row[0] as String)
.replaceFirst(RegExp('^お・'), '')
.replaceAll(RegExp(r'.*'), '');
.replaceFirst(RegExp('^お・'), '')
.replaceAll(RegExp(r'.*'), '');
final readings = (row[1] as String)
.split(RegExp('[・/、(:?\s+)]'))
.split(RegExp('[・/、(:?s+)]'))
.map((e) => e.trim())
.toList();
final meanings =
(row[2] as String).split(',').expand(cleanMeaning).toList();
final meanings = (row[2] as String)
.split(',')
.expand(cleanMeaning)
.toList();
result.add(JLPTRankedWord(
readings: readings,
kanji: kanji,
jlptLevel: jlptLevel,
meanings: meanings,
));
result.add(
JLPTRankedWord(
readings: readings,
kanji: kanji,
jlptLevel: jlptLevel,
meanings: meanings,
),
);
}
}

View File

@@ -13,5 +13,5 @@ class JLPTRankedWord {
@override
String toString() =>
'(${jlptLevel},${kanji},"${readings.join(",")}","${meanings.join(",")})';
'($jlptLevel,$kanji,"${readings.join(",")}","${meanings.join(",")})';
}

View File

@@ -1,47 +1,43 @@
import 'package:jadb/_data_ingestion/jmdict/table_names.dart';
import 'package:jadb/_data_ingestion/tanos-jlpt/objects.dart';
import 'package:jadb/_data_ingestion/tanos-jlpt/overrides.dart';
import 'package:jadb/util/sqlite_utils.dart';
import 'package:jadb/table_names/jmdict.dart';
import 'package:sqflite_common/sqlite_api.dart';
Future<List<int>> _findReadingCandidates(
JLPTRankedWord word,
Database db,
) =>
db
.query(
JMdictTableNames.readingElement,
columns: ['entryId'],
where:
'reading IN (${word.readings.map((e) => escapeStringValue(e)).join(',')})',
)
.then((rows) => rows.map((row) => row['entryId'] as int).toList());
Future<List<int>> _findReadingCandidates(JLPTRankedWord word, Database db) => db
.query(
JMdictTableNames.readingElement,
columns: ['entryId'],
where:
'"reading" IN (${List.filled(word.readings.length, '?').join(',')})',
whereArgs: [...word.readings],
)
.then((rows) => rows.map((row) => row['entryId'] as int).toList());
Future<List<int>> _findKanjiCandidates(
JLPTRankedWord word,
Database db,
) =>
db
.query(
JMdictTableNames.kanjiElement,
columns: ['entryId'],
where: 'reading = ?',
whereArgs: [word.kanji],
)
.then((rows) => rows.map((row) => row['entryId'] as int).toList());
Future<List<int>> _findKanjiCandidates(JLPTRankedWord word, Database db) => db
.query(
JMdictTableNames.kanjiElement,
columns: ['entryId'],
where: 'reading = ?',
whereArgs: [word.kanji],
)
.then((rows) => rows.map((row) => row['entryId'] as int).toList());
Future<List<(int, String)>> _findSenseCandidates(
JLPTRankedWord word,
Database db,
) =>
db
.rawQuery('SELECT entryId, phrase '
'FROM ${JMdictTableNames.senseGlossary} '
'JOIN ${JMdictTableNames.sense} ON ${JMdictTableNames.senseGlossary}.senseId = ${JMdictTableNames.sense}.id '
'WHERE phrase IN (${word.meanings.map((e) => escapeStringValue(e)).join(',')})')
.then((rows) => rows
.map((row) => (row['entryId'] as int, row['phrase'] as String))
.toList());
) => db
.rawQuery(
'SELECT entryId, phrase '
'FROM "${JMdictTableNames.senseGlossary}" '
'JOIN "${JMdictTableNames.sense}" USING (senseId)'
'WHERE phrase IN (${List.filled(word.meanings.length, '?').join(',')})',
[...word.meanings],
)
.then(
(rows) => rows
.map((row) => (row['entryId'] as int, row['phrase'] as String))
.toList(),
);
Future<int?> findEntry(
JLPTRankedWord word,
@@ -49,8 +45,10 @@ Future<int?> findEntry(
bool useOverrides = true,
}) async {
final List<int> readingCandidates = await _findReadingCandidates(word, db);
final List<(int, String)> senseCandidates =
await _findSenseCandidates(word, db);
final List<(int, String)> senseCandidates = await _findSenseCandidates(
word,
db,
);
List<int> entryIds;
@@ -65,8 +63,10 @@ Future<int?> findEntry(
print('No entry found, trying to combine with senses');
entryIds = readingCandidates
.where((readingId) =>
senseCandidates.any((sense) => sense.$1 == readingId))
.where(
(readingId) =>
senseCandidates.any((sense) => sense.$1 == readingId),
)
.toList();
}
} else {
@@ -82,12 +82,15 @@ Future<int?> findEntry(
if (overrideEntries.length > 1) {
throw Exception(
'Multiple override entries found for ${word.toString()}: $entryIds');
} else if (overrideEntries.length == 0 &&
!word.readings.any((reading) =>
TANOS_JLPT_OVERRIDES.containsKey((word.kanji, reading)))) {
'Multiple override entries found for ${word.toString()}: $entryIds',
);
} else if (overrideEntries.isEmpty &&
!word.readings.any(
(reading) => TANOS_JLPT_OVERRIDES.containsKey((word.kanji, reading)),
)) {
throw Exception(
'No override entry found for ${word.toString()}: $entryIds');
'No override entry found for ${word.toString()}: $entryIds',
);
}
print('Found override: ${overrideEntries.firstOrNull}');
@@ -97,7 +100,8 @@ Future<int?> findEntry(
if (entryIds.length > 1) {
throw Exception(
'Multiple override entries found for ${word.toString()}: $entryIds');
'Multiple override entries found for ${word.toString()}: $entryIds',
);
} else if (entryIds.isEmpty) {
throw Exception('No entry found for ${word.toString()}');
}
@@ -123,7 +127,9 @@ Future<Map<String, Set<int>>> resolveAllEntries(
if (resolved != null) {
result[word.jlptLevel]!.add(resolved);
}
} catch (e) {}
} catch (e) {
print('ERROR: $e');
}
}
return result;

View File

@@ -1,24 +1,21 @@
import 'package:jadb/_data_ingestion/tanos-jlpt/table_names.dart';
import 'package:jadb/table_names/tanos_jlpt.dart';
import 'package:sqflite_common/sqlite_api.dart';
Future<void> seedTanosJLPTData(
Map<String, Set<int>> resolvedEntries,
Database db,
) async {
Batch b = db.batch();
final Batch b = db.batch();
for (final jlptLevel in resolvedEntries.entries) {
final level = jlptLevel.key;
final entryIds = jlptLevel.value;
for (final entryId in entryIds) {
b.insert(
TanosJLPTTableNames.jlptTag,
{
'entryId': entryId,
'jlptLevel': level,
},
);
b.insert(TanosJLPTTableNames.jlptTag, {
'entryId': entryId,
'jlptLevel': level,
});
}
}

View File

@@ -1,19 +1,30 @@
import 'dart:io';
import 'package:args/command_runner.dart';
import 'package:jadb/_data_ingestion/open_local_db.dart';
import 'package:jadb/_data_ingestion/seed_database.dart';
import 'package:args/command_runner.dart';
import 'package:jadb/cli/args.dart';
class CreateDb extends Command {
final name = "create-db";
final description = "Create the database";
@override
final name = 'create-db';
@override
final description = 'Create the database';
CreateDb() {
addLibsqliteArg(argParser);
argParser.addFlag(
'wal',
help: '''Whether to use Write-Ahead Logging (WAL) mode.
This is recommended for better performance, but may not be used with
the readonly NixOS store.
''',
defaultsTo: false,
);
}
@override
Future<void> run() async {
if (argResults!.option('libsqlite') == null) {
print(argParser.usage);
@@ -22,15 +33,26 @@ class CreateDb extends Command {
final db = await openLocalDb(
libsqlitePath: argResults!.option('libsqlite')!,
walMode: argResults!.flag('wal'),
readWrite: true,
);
seedData(db).then((_) {
print("Database created successfully");
}).catchError((error) {
print("Error creating database: $error");
}).whenComplete(() {
db.close();
});
bool failed = false;
await seedData(db)
.then((_) {
print('Database created successfully');
})
.catchError((error) {
print('Error creating database: $error');
failed = true;
})
.whenComplete(() {
db.close();
});
if (failed) {
exit(1);
} else {
exit(0);
}
}
}

View File

@@ -1,8 +1,7 @@
import 'dart:io';
import 'package:jadb/_data_ingestion/open_local_db.dart';
import 'package:args/command_runner.dart';
import 'package:jadb/_data_ingestion/open_local_db.dart';
import 'package:jadb/_data_ingestion/tanos-jlpt/csv_parser.dart';
import 'package:jadb/_data_ingestion/tanos-jlpt/objects.dart';
import 'package:jadb/_data_ingestion/tanos-jlpt/resolve.dart';
@@ -10,9 +9,11 @@ import 'package:jadb/cli/args.dart';
import 'package:sqflite_common/sqlite_api.dart';
class CreateTanosJlptMappings extends Command {
final name = "create-tanos-jlpt-mappings";
@override
final name = 'create-tanos-jlpt-mappings';
@override
final description =
"Resolve Tanos JLPT data against JMDict. This tool is useful to create overrides for ambiguous references";
'Resolve Tanos JLPT data against JMDict. This tool is useful to create overrides for ambiguous references';
CreateTanosJlptMappings() {
addLibsqliteArg(argParser);
@@ -26,6 +27,7 @@ class CreateTanosJlptMappings extends Command {
);
}
@override
Future<void> run() async {
if (argResults!.option('libsqlite') == null ||
argResults!.option('jadb') == null) {
@@ -40,7 +42,7 @@ class CreateTanosJlptMappings extends Command {
final useOverrides = argResults!.flag('overrides');
Map<String, File> files = {
final Map<String, File> files = {
'N1': File('data/tanos-jlpt/n1.csv'),
'N2': File('data/tanos-jlpt/n2.csv'),
'N3': File('data/tanos-jlpt/n3.csv'),
@@ -59,11 +61,12 @@ Future<void> resolveExisting(
Database db,
bool useOverrides,
) async {
List<JLPTRankedWord> missingWords = [];
final List<JLPTRankedWord> missingWords = [];
for (final (i, word) in rankedWords.indexed) {
try {
print(
'[${(i + 1).toString().padLeft(4, '0')}/${rankedWords.length}] ${word.toString()}');
'[${(i + 1).toString().padLeft(4, '0')}/${rankedWords.length}] ${word.toString()}',
);
await findEntry(word, db, useOverrides: useOverrides);
} catch (e) {
print(e);
@@ -78,16 +81,19 @@ Future<void> resolveExisting(
print('Statistics:');
for (final jlptLevel in ['N5', 'N4', 'N3', 'N2', 'N1']) {
final missingWordCount =
missingWords.where((e) => e.jlptLevel == jlptLevel).length;
final totalWordCount =
rankedWords.where((e) => e.jlptLevel == jlptLevel).length;
final missingWordCount = missingWords
.where((e) => e.jlptLevel == jlptLevel)
.length;
final totalWordCount = rankedWords
.where((e) => e.jlptLevel == jlptLevel)
.length;
final failureRate =
((missingWordCount / totalWordCount) * 100).toStringAsFixed(2);
final failureRate = ((missingWordCount / totalWordCount) * 100)
.toStringAsFixed(2);
print(
'${jlptLevel} failures: [${missingWordCount}/${totalWordCount}] (${failureRate}%)');
'$jlptLevel failures: [$missingWordCount/$totalWordCount] ($failureRate%)',
);
}
print('Not able to determine the entry for ${missingWords.length} words');

View File

@@ -0,0 +1,48 @@
// import 'dart:io';
import 'package:args/command_runner.dart';
// import 'package:jadb/_data_ingestion/open_local_db.dart';
import 'package:jadb/cli/args.dart';
import 'package:jadb/util/lemmatizer/lemmatizer.dart';
class Lemmatize extends Command {
@override
final name = 'lemmatize';
@override
final description = 'Lemmatize a word using the Jadb lemmatizer';
Lemmatize() {
addLibsqliteArg(argParser);
addJadbArg(argParser);
argParser.addOption(
'word',
abbr: 'w',
help: 'The word to search for.',
valueHelp: 'WORD',
);
}
@override
Future<void> run() async {
// if (argResults!.option('libsqlite') == null ||
// argResults!.option('jadb') == null) {
// print(argParser.usage);
// exit(64);
// }
// final db = await openLocalDb(
// jadbPath: argResults!.option('jadb')!,
// libsqlitePath: argResults!.option('libsqlite')!,
// );
final String searchWord = argResults!.option('word') ?? '食べたくない';
final time = Stopwatch()..start();
final result = lemmatize(searchWord);
time.stop();
print(result.toString());
print('Lemmatization took ${time.elapsedMilliseconds}ms');
}
}

View File

@@ -1,22 +1,25 @@
import 'dart:convert';
import 'dart:io';
import 'package:args/command_runner.dart';
import 'package:jadb/_data_ingestion/open_local_db.dart';
import 'package:jadb/cli/args.dart';
import 'package:jadb/search.dart';
import 'package:args/command_runner.dart';
class QueryKanji extends Command {
final name = "query-kanji";
final description = "Query the database for kanji data";
@override
final name = 'query-kanji';
@override
final description = 'Query the database for kanji data';
@override
final invocation = 'jadb query-kanji [options] <kanji>';
QueryKanji() {
addLibsqliteArg(argParser);
addJadbArg(argParser);
}
@override
Future<void> run() async {
if (argResults!.option('libsqlite') == null ||
argResults!.option('jadb') == null) {
@@ -29,12 +32,25 @@ class QueryKanji extends Command {
libsqlitePath: argResults!.option('libsqlite')!,
);
final result = await JaDBConnection(db).searchKanji('');
if (argResults!.rest.length != 1) {
print('You need to provide exactly one kanji character to search for.');
print('');
printUsage();
exit(64);
}
final String kanji = argResults!.rest.first.trim();
final time = Stopwatch()..start();
final result = await JaDBConnection(db).jadbSearchKanji(kanji);
time.stop();
if (result == null) {
print("No such kanji");
print('No such kanji');
} else {
print(JsonEncoder.withIndent(' ').convert(result.toJson()));
}
print('Query took ${time.elapsedMilliseconds}ms');
}
}

View File

@@ -1,26 +1,38 @@
import 'dart:convert';
import 'dart:io';
import 'package:args/command_runner.dart';
import 'package:jadb/_data_ingestion/open_local_db.dart';
import 'package:jadb/cli/args.dart';
import 'package:jadb/search.dart';
import 'package:args/command_runner.dart';
import 'package:sqflite_common/sqflite.dart';
class QueryWord extends Command {
final name = "query-word";
final description = "Query the database for word data";
@override
final name = 'query-word';
@override
final description = 'Query the database for word data';
@override
final invocation = 'jadb query-word [options] (<word> | <ID>)';
QueryWord() {
addLibsqliteArg(argParser);
addJadbArg(argParser);
argParser.addFlag('json', abbr: 'j', help: 'Output results in JSON format');
argParser.addOption('page', abbr: 'p', valueHelp: 'NUM', defaultsTo: '0');
argParser.addOption('pageSize', valueHelp: 'NUM', defaultsTo: '30');
}
@override
Future<void> run() async {
if (argResults!.option('libsqlite') == null ||
argResults!.option('jadb') == null) {
print(argParser.usage);
print('You need to provide both libsqlite and jadb paths.');
print('');
printUsage();
exit(64);
}
@@ -29,15 +41,81 @@ class QueryWord extends Command {
libsqlitePath: argResults!.option('libsqlite')!,
);
final result = await JaDBConnection(db).searchWord('kana');
if (argResults!.rest.isEmpty) {
print('You need to provide a word or ID to search for.');
print('');
printUsage();
exit(64);
}
if (result == null) {
print("Invalid search");
} else if (result.isEmpty) {
print("No matches");
final String searchWord = argResults!.rest.join(' ');
final int? maybeId = int.tryParse(searchWord);
if (maybeId != null && maybeId >= 1000000) {
await _searchId(db, maybeId, argResults!.flag('json'));
} else {
print(JsonEncoder.withIndent(' ')
.convert(result.map((e) => e.toJson()).toList()));
await _searchWord(
db,
searchWord,
argResults!.flag('json'),
int.parse(argResults!.option('page')!),
int.parse(argResults!.option('pageSize')!),
);
}
}
Future<void> _searchId(DatabaseExecutor db, int id, bool jsonOutput) async {
final time = Stopwatch()..start();
final result = await JaDBConnection(db).jadbGetWordById(id);
time.stop();
if (result == null) {
print('Invalid ID');
} else {
if (jsonOutput) {
print(JsonEncoder.withIndent(' ').convert(result));
} else {
print(result.toString());
}
}
print('Query took ${time.elapsedMilliseconds}ms');
}
Future<void> _searchWord(
DatabaseExecutor db,
String searchWord,
bool jsonOutput,
int page,
int pageSize,
) async {
final time = Stopwatch()..start();
final count = await JaDBConnection(db).jadbSearchWordCount(searchWord);
time.stop();
final time2 = Stopwatch()..start();
final result = await JaDBConnection(
db,
).jadbSearchWord(searchWord, page: page, pageSize: pageSize);
time2.stop();
if (result == null) {
print('Invalid search');
} else if (result.isEmpty) {
print('No matches');
} else {
if (jsonOutput) {
print(JsonEncoder.withIndent(' ').convert(result));
} else {
for (final e in result) {
print(e.toString());
print('');
}
}
}
print('Total count: $count');
print('Count query took ${time.elapsedMilliseconds}ms');
print('Query took ${time2.elapsedMilliseconds}ms');
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,217 @@
const Map<int, List<String>> RADICALS = {
1: ['', '', '', '', '', ''],
2: [
'',
'',
'',
'',
'𠆢',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'𠂉',
],
3: [
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'广',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
],
4: [
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
],
5: [
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
],
6: [
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'西',
],
7: [
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
],
8: ['', '', '', '', '', '', '', '', '', '', '', ''],
9: ['', '', '', '', '', '', '', '', '', '', ''],
10: ['', '', '', '', '', '', '', '', '', ''],
11: ['', '', '', '鹿', '', '', '', '', ''],
12: ['', '', '', ''],
13: ['', '', '', ''],
14: ['', ''],
17: [''],
};

View File

@@ -1 +1,56 @@
enum JlptLevel { none, n5, n4, n3, n2, n1 }
enum JlptLevel implements Comparable<JlptLevel> {
none,
n1,
n2,
n3,
n4,
n5;
factory JlptLevel.fromString(String? level) {
switch (level?.toUpperCase()) {
case 'N1':
return JlptLevel.n1;
case 'N2':
return JlptLevel.n2;
case 'N3':
return JlptLevel.n3;
case 'N4':
return JlptLevel.n4;
case 'N5':
return JlptLevel.n5;
default:
return JlptLevel.none;
}
}
String? toNullableString() {
switch (this) {
case JlptLevel.n1:
return 'N1';
case JlptLevel.n2:
return 'N2';
case JlptLevel.n3:
return 'N3';
case JlptLevel.n4:
return 'N4';
case JlptLevel.n5:
return 'N5';
case JlptLevel.none:
return null;
}
}
int? get asInt =>
this == JlptLevel.none ? null : JlptLevel.values.indexOf(this);
@override
String toString() => toNullableString() ?? 'N/A';
Object? toJson() => toNullableString();
factory JlptLevel.fromJson(Object? json) =>
JlptLevel.fromString(json as String?);
@override
int compareTo(JlptLevel other) => index - other.index;
}

View File

@@ -0,0 +1,27 @@
import 'dart:io';
import 'dart:isolate';
import 'package:path/path.dart';
import 'package:sqflite_common/sqlite_api.dart';
String migrationDirPath() {
final packageUri = Uri.parse('package:jadb/');
final packagePath = Isolate.resolvePackageUriSync(packageUri);
return packagePath!.resolve('../migrations').toFilePath();
}
Future<void> createEmptyDb(DatabaseExecutor db) async {
final List<String> migrationFiles = [];
for (final file in Directory(migrationDirPath()).listSync()) {
if (file is File && file.path.endsWith('.sql')) {
migrationFiles.add(file.path);
}
}
migrationFiles.sort((a, b) => basename(a).compareTo(basename(b)));
for (final file in migrationFiles) {
final sql = await File(file).readAsString();
await db.execute(sql);
}
}

View File

@@ -19,21 +19,14 @@ enum JMdictDialect {
final String id;
final String description;
const JMdictDialect({
required this.id,
required this.description,
});
const JMdictDialect({required this.id, required this.description});
static JMdictDialect fromId(String id) =>
JMdictDialect.values.firstWhere(
(e) => e.id == id,
orElse: () => throw Exception('Unknown id: $id'),
);
static JMdictDialect fromId(String id) => JMdictDialect.values.firstWhere(
(e) => e.id == id,
orElse: () => throw Exception('Unknown id: $id'),
);
Map<String, Object?> toJson() => {
'id': id,
'description': description,
};
Map<String, Object?> toJson() => {'id': id, 'description': description};
static JMdictDialect fromJson(Map<String, Object?> json) =>
JMdictDialect.values.firstWhere(

View File

@@ -102,21 +102,14 @@ enum JMdictField {
final String id;
final String description;
const JMdictField({
required this.id,
required this.description,
});
const JMdictField({required this.id, required this.description});
static JMdictField fromId(String id) =>
JMdictField.values.firstWhere(
(e) => e.id == id,
orElse: () => throw Exception('Unknown id: $id'),
);
static JMdictField fromId(String id) => JMdictField.values.firstWhere(
(e) => e.id == id,
orElse: () => throw Exception('Unknown id: $id'),
);
Map<String, Object?> toJson() => {
'id': id,
'description': description,
};
Map<String, Object?> toJson() => {'id': id, 'description': description};
static JMdictField fromJson(Map<String, Object?> json) =>
JMdictField.values.firstWhere(

View File

@@ -13,21 +13,14 @@ enum JMdictKanjiInfo {
final String id;
final String description;
const JMdictKanjiInfo({
required this.id,
required this.description,
});
const JMdictKanjiInfo({required this.id, required this.description});
static JMdictKanjiInfo fromId(String id) =>
JMdictKanjiInfo.values.firstWhere(
(e) => e.id == id,
orElse: () => throw Exception('Unknown id: $id'),
);
static JMdictKanjiInfo fromId(String id) => JMdictKanjiInfo.values.firstWhere(
(e) => e.id == id,
orElse: () => throw Exception('Unknown id: $id'),
);
Map<String, Object?> toJson() => {
'id': id,
'description': description,
};
Map<String, Object?> toJson() => {'id': id, 'description': description};
static JMdictKanjiInfo fromJson(Map<String, Object?> json) =>
JMdictKanjiInfo.values.firstWhere(

View File

@@ -74,21 +74,14 @@ enum JMdictMisc {
final String id;
final String description;
const JMdictMisc({
required this.id,
required this.description,
});
const JMdictMisc({required this.id, required this.description});
static JMdictMisc fromId(String id) =>
JMdictMisc.values.firstWhere(
(e) => e.id == id,
orElse: () => throw Exception('Unknown id: $id'),
);
static JMdictMisc fromId(String id) => JMdictMisc.values.firstWhere(
(e) => e.id == id,
orElse: () => throw Exception('Unknown id: $id'),
);
Map<String, Object?> toJson() => {
'id': id,
'description': description,
};
Map<String, Object?> toJson() => {'id': id, 'description': description};
static JMdictMisc fromJson(Map<String, Object?> json) =>
JMdictMisc.values.firstWhere(

View File

@@ -7,14 +7,21 @@ enum JMdictPOS {
adjIx(id: 'adj-ix', description: 'adjective (keiyoushi) - yoi/ii class'),
adjKari(id: 'adj-kari', description: '\'kari\' adjective (archaic)'),
adjKu(id: 'adj-ku', description: '\'ku\' adjective (archaic)'),
adjNa(id: 'adj-na', description: 'adjectival nouns or quasi-adjectives (keiyodoshi)'),
adjNa(
id: 'adj-na',
description: 'adjectival nouns or quasi-adjectives (keiyodoshi)',
),
adjNari(id: 'adj-nari', description: 'archaic/formal form of na-adjective'),
adjNo(id: 'adj-no', description: 'nouns which may take the genitive case particle ''no'''),
adjNo(
id: 'adj-no',
description: 'nouns which may take the genitive case particle \'no\'',
shortDescription: 'Na-adjective (keiyodoshi)',
),
adjPn(id: 'adj-pn', description: 'pre-noun adjectival (rentaishi)'),
adjShiku(id: 'adj-shiku', description: '\'shiku\' adjective (archaic)'),
adjT(id: 'adj-t', description: '\'taru\' adjective'),
adv(id: 'adv', description: 'adverb (fukushi)'),
advTo(id: 'adv-to', description: 'adverb taking the ''to'' particle'),
advTo(id: 'adv-to', description: 'adverb taking the \'to\' particle'),
aux(id: 'aux', description: 'auxiliary'),
auxAdj(id: 'aux-adj', description: 'auxiliary adjective'),
auxV(id: 'aux-v', description: 'auxiliary verb'),
@@ -23,7 +30,11 @@ enum JMdictPOS {
ctr(id: 'ctr', description: 'counter'),
exp(id: 'exp', description: 'expressions (phrases, clauses, etc.)'),
int(id: 'int', description: 'interjection (kandoushi)'),
n(id: 'n', description: 'noun (common) (futsuumeishi)'),
n(
id: 'n',
description: 'noun (common) (futsuumeishi)',
shortDescription: 'noun',
),
nAdv(id: 'n-adv', description: 'adverbial noun (fukushitekimeishi)'),
nPr(id: 'n-pr', description: 'proper noun'),
nPref(id: 'n-pref', description: 'noun, used as a prefix'),
@@ -38,81 +49,164 @@ enum JMdictPOS {
vUnspec(id: 'v-unspec', description: 'verb unspecified'),
v1(id: 'v1', description: 'Ichidan verb'),
v1S(id: 'v1-s', description: 'Ichidan verb - kureru special class'),
v2aS(id: 'v2a-s', description: 'Nidan verb with ''u'' ending (archaic)'),
v2bK(id: 'v2b-k', description: 'Nidan verb (upper class) with ''bu'' ending (archaic)'),
v2bS(id: 'v2b-s', description: 'Nidan verb (lower class) with ''bu'' ending (archaic)'),
v2dK(id: 'v2d-k', description: 'Nidan verb (upper class) with ''dzu'' ending (archaic)'),
v2dS(id: 'v2d-s', description: 'Nidan verb (lower class) with ''dzu'' ending (archaic)'),
v2gK(id: 'v2g-k', description: 'Nidan verb (upper class) with ''gu'' ending (archaic)'),
v2gS(id: 'v2g-s', description: 'Nidan verb (lower class) with ''gu'' ending (archaic)'),
v2hK(id: 'v2h-k', description: 'Nidan verb (upper class) with ''hu/fu'' ending (archaic)'),
v2hS(id: 'v2h-s', description: 'Nidan verb (lower class) with ''hu/fu'' ending (archaic)'),
v2kK(id: 'v2k-k', description: 'Nidan verb (upper class) with ''ku'' ending (archaic)'),
v2kS(id: 'v2k-s', description: 'Nidan verb (lower class) with ''ku'' ending (archaic)'),
v2mK(id: 'v2m-k', description: 'Nidan verb (upper class) with ''mu'' ending (archaic)'),
v2mS(id: 'v2m-s', description: 'Nidan verb (lower class) with ''mu'' ending (archaic)'),
v2nS(id: 'v2n-s', description: 'Nidan verb (lower class) with ''nu'' ending (archaic)'),
v2rK(id: 'v2r-k', description: 'Nidan verb (upper class) with ''ru'' ending (archaic)'),
v2rS(id: 'v2r-s', description: 'Nidan verb (lower class) with ''ru'' ending (archaic)'),
v2sS(id: 'v2s-s', description: 'Nidan verb (lower class) with ''su'' ending (archaic)'),
v2tK(id: 'v2t-k', description: 'Nidan verb (upper class) with ''tsu'' ending (archaic)'),
v2tS(id: 'v2t-s', description: 'Nidan verb (lower class) with ''tsu'' ending (archaic)'),
v2wS(id: 'v2w-s', description: 'Nidan verb (lower class) with ''u'' ending and ''we'' conjugation (archaic)'),
v2yK(id: 'v2y-k', description: 'Nidan verb (upper class) with ''yu'' ending (archaic)'),
v2yS(id: 'v2y-s', description: 'Nidan verb (lower class) with ''yu'' ending (archaic)'),
v2zS(id: 'v2z-s', description: 'Nidan verb (lower class) with ''zu'' ending (archaic)'),
v4b(id: 'v4b', description: 'Yodan verb with ''bu'' ending (archaic)'),
v4g(id: 'v4g', description: 'Yodan verb with ''gu'' ending (archaic)'),
v4h(id: 'v4h', description: 'Yodan verb with ''hu/fu'' ending (archaic)'),
v4k(id: 'v4k', description: 'Yodan verb with ''ku'' ending (archaic)'),
v4m(id: 'v4m', description: 'Yodan verb with ''mu'' ending (archaic)'),
v4n(id: 'v4n', description: 'Yodan verb with ''nu'' ending (archaic)'),
v4r(id: 'v4r', description: 'Yodan verb with ''ru'' ending (archaic)'),
v4s(id: 'v4s', description: 'Yodan verb with ''su'' ending (archaic)'),
v4t(id: 'v4t', description: 'Yodan verb with ''tsu'' ending (archaic)'),
v2aS(id: 'v2a-s', description: 'Nidan verb with \'u\' ending (archaic)'),
v2bK(
id: 'v2b-k',
description: 'Nidan verb (upper class) with \'bu\' ending (archaic)',
),
v2bS(
id: 'v2b-s',
description: 'Nidan verb (lower class) with \'bu\' ending (archaic)',
),
v2dK(
id: 'v2d-k',
description: 'Nidan verb (upper class) with \'dzu\' ending (archaic)',
),
v2dS(
id: 'v2d-s',
description: 'Nidan verb (lower class) with \'dzu\' ending (archaic)',
),
v2gK(
id: 'v2g-k',
description: 'Nidan verb (upper class) with \'gu\' ending (archaic)',
),
v2gS(
id: 'v2g-s',
description: 'Nidan verb (lower class) with \'gu\' ending (archaic)',
),
v2hK(
id: 'v2h-k',
description: 'Nidan verb (upper class) with \'hu/fu\' ending (archaic)',
),
v2hS(
id: 'v2h-s',
description: 'Nidan verb (lower class) with \'hu/fu\' ending (archaic)',
),
v2kK(
id: 'v2k-k',
description: 'Nidan verb (upper class) with \'ku\' ending (archaic)',
),
v2kS(
id: 'v2k-s',
description: 'Nidan verb (lower class) with \'ku\' ending (archaic)',
),
v2mK(
id: 'v2m-k',
description: 'Nidan verb (upper class) with \'mu\' ending (archaic)',
),
v2mS(
id: 'v2m-s',
description: 'Nidan verb (lower class) with \'mu\' ending (archaic)',
),
v2nS(
id: 'v2n-s',
description: 'Nidan verb (lower class) with \'nu\' ending (archaic)',
),
v2rK(
id: 'v2r-k',
description: 'Nidan verb (upper class) with \'ru\' ending (archaic)',
),
v2rS(
id: 'v2r-s',
description: 'Nidan verb (lower class) with \'ru\' ending (archaic)',
),
v2sS(
id: 'v2s-s',
description: 'Nidan verb (lower class) with \'su\' ending (archaic)',
),
v2tK(
id: 'v2t-k',
description: 'Nidan verb (upper class) with \'tsu\' ending (archaic)',
),
v2tS(
id: 'v2t-s',
description: 'Nidan verb (lower class) with \'tsu\' ending (archaic)',
),
v2wS(
id: 'v2w-s',
description:
'Nidan verb (lower class) with \'u\' ending and \'we\' conjugation (archaic)',
),
v2yK(
id: 'v2y-k',
description: 'Nidan verb (upper class) with \'yu\' ending (archaic)',
),
v2yS(
id: 'v2y-s',
description: 'Nidan verb (lower class) with \'yu\' ending (archaic)',
),
v2zS(
id: 'v2z-s',
description: 'Nidan verb (lower class) with \'zu\' ending (archaic)',
),
v4b(id: 'v4b', description: 'Yodan verb with \'bu\' ending (archaic)'),
v4g(id: 'v4g', description: 'Yodan verb with \'gu\' ending (archaic)'),
v4h(id: 'v4h', description: 'Yodan verb with \'hu/fu\' ending (archaic)'),
v4k(id: 'v4k', description: 'Yodan verb with \'ku\' ending (archaic)'),
v4m(id: 'v4m', description: 'Yodan verb with \'mu\' ending (archaic)'),
v4n(id: 'v4n', description: 'Yodan verb with \'nu\' ending (archaic)'),
v4r(id: 'v4r', description: 'Yodan verb with \'ru\' ending (archaic)'),
v4s(id: 'v4s', description: 'Yodan verb with \'su\' ending (archaic)'),
v4t(id: 'v4t', description: 'Yodan verb with \'tsu\' ending (archaic)'),
v5aru(id: 'v5aru', description: 'Godan verb - -aru special class'),
v5b(id: 'v5b', description: 'Godan verb with ''bu'' ending'),
v5g(id: 'v5g', description: 'Godan verb with ''gu'' ending'),
v5k(id: 'v5k', description: 'Godan verb with ''ku'' ending'),
v5b(id: 'v5b', description: 'Godan verb with \'bu\' ending'),
v5g(id: 'v5g', description: 'Godan verb with \'gu\' ending'),
v5k(id: 'v5k', description: 'Godan verb with \'ku\' ending'),
v5kS(id: 'v5k-s', description: 'Godan verb - Iku/Yuku special class'),
v5m(id: 'v5m', description: 'Godan verb with ''mu'' ending'),
v5n(id: 'v5n', description: 'Godan verb with ''nu'' ending'),
v5r(id: 'v5r', description: 'Godan verb with ''ru'' ending'),
v5rI(id: 'v5r-i', description: 'Godan verb with ''ru'' ending (irregular verb)'),
v5s(id: 'v5s', description: 'Godan verb with ''su'' ending'),
v5t(id: 'v5t', description: 'Godan verb with ''tsu'' ending'),
v5u(id: 'v5u', description: 'Godan verb with ''u'' ending'),
v5uS(id: 'v5u-s', description: 'Godan verb with ''u'' ending (special class)'),
v5uru(id: 'v5uru', description: 'Godan verb - Uru old class verb (old form of Eru)'),
v5m(id: 'v5m', description: 'Godan verb with \'mu\' ending'),
v5n(id: 'v5n', description: 'Godan verb with \'nu\' ending'),
v5r(id: 'v5r', description: 'Godan verb with \'ru\' ending'),
v5rI(
id: 'v5r-i',
description: 'Godan verb with \'ru\' ending (irregular verb)',
),
v5s(id: 'v5s', description: 'Godan verb with \'su\' ending'),
v5t(id: 'v5t', description: 'Godan verb with \'tsu\' ending'),
v5u(id: 'v5u', description: 'Godan verb with \'u\' ending'),
v5uS(
id: 'v5u-s',
description: 'Godan verb with \'u\' ending (special class)',
),
v5uru(
id: 'v5uru',
description: 'Godan verb - Uru old class verb (old form of Eru)',
),
vi(id: 'vi', description: 'intransitive verb'),
vk(id: 'vk', description: 'Kuru verb - special class'),
vn(id: 'vn', description: 'irregular nu verb'),
vr(id: 'vr', description: 'irregular ru verb, plain form ends with -ri'),
vs(id: 'vs', description: 'noun or participle which takes the aux. verb suru'),
vsC(id: 'vs-c', description: 'suru verb - precursor to the modern suru'),
vs(
id: 'vs',
description: 'noun or participle which takes the aux. verb suru',
shortDescription: 'suru verb',
),
vsC(id: 'vs-c', description: 'su verb - precursor to the modern suru'),
vsI(id: 'vs-i', description: 'suru verb - included'),
vsS(id: 'vs-s', description: 'suru verb - special class'),
vt(id: 'vt', description: 'transitive verb'),
vz(id: 'vz', description: 'Ichidan verb - zuru verb (alternative form of -jiru verbs)');
vz(
id: 'vz',
description: 'Ichidan verb - zuru verb (alternative form of -jiru verbs)',
);
final String id;
final String description;
final String? _shortDescription;
const JMdictPOS({
required this.id,
required this.description,
});
String? shortDescription,
}) : _shortDescription = shortDescription;
static JMdictPOS fromId(String id) =>
JMdictPOS.values.firstWhere(
(e) => e.id == id,
orElse: () => throw Exception('Unknown id: $id'),
);
String get shortDescription => _shortDescription ?? description;
Map<String, Object?> toJson() => {
'id': id,
'description': description,
};
static JMdictPOS fromId(String id) => JMdictPOS.values.firstWhere(
(e) => e.id == id,
orElse: () => throw Exception('Unknown id: $id'),
);
Map<String, Object?> toJson() => {'id': id, 'description': description};
static JMdictPOS fromJson(Map<String, Object?> json) =>
JMdictPOS.values.firstWhere(

View File

@@ -15,10 +15,7 @@ enum JMdictReadingInfo {
final String id;
final String description;
const JMdictReadingInfo({
required this.id,
required this.description,
});
const JMdictReadingInfo({required this.id, required this.description});
static JMdictReadingInfo fromId(String id) =>
JMdictReadingInfo.values.firstWhere(
@@ -26,10 +23,7 @@ enum JMdictReadingInfo {
orElse: () => throw Exception('Unknown id: $id'),
);
Map<String, Object?> toJson() => {
'id': id,
'description': description,
};
Map<String, Object?> toJson() => {'id': id, 'description': description};
static JMdictReadingInfo fromJson(Map<String, Object?> json) =>
JMdictReadingInfo.values.firstWhere(

View File

@@ -4,37 +4,43 @@ class KanjiSearchRadical extends Equatable {
/// The radical symbol.
final String symbol;
/// The names of this radical.
///
/// Each name might refer to a specific form of the radical.
final List<String> names;
/// The radical forms used in this kanji.
///
/// (e.g. "亻" for "人", "氵" for "水")
final List<String> forms;
/// The meaning of the radical.
final String meaning;
/// The meanings of the radical.
final List<String> meanings;
// ignore: public_member_api_docs
const KanjiSearchRadical({
required this.symbol,
this.forms = const [],
required this.meaning,
required this.names,
required this.forms,
required this.meanings,
});
@override
List<Object> get props => [
symbol,
forms,
meaning,
];
List<Object> get props => [symbol, names, forms, meanings];
Map<String, dynamic> toJson() => {
'symbol': symbol,
'forms': forms,
'meaning': meaning,
};
'symbol': symbol,
'names': names,
'forms': forms,
'meanings': meanings,
};
factory KanjiSearchRadical.fromJson(Map<String, dynamic> json) {
return KanjiSearchRadical(
symbol: json['symbol'] as String,
names: (json['names'] as List).map((e) => e as String).toList(),
forms: (json['forms'] as List).map((e) => e as String).toList(),
meaning: json['meaning'] as String,
meanings: (json['meanings'] as List).map((e) => e as String).toList(),
);
}
}

View File

@@ -7,7 +7,8 @@ class KanjiSearchResult extends Equatable {
final String kanji;
/// The school level that the kanji is taught in, if applicable.
final String? taughtIn;
/// Ranges from `1` to `10` (except 7)
final int? taughtIn;
/// The lowest JLPT exam that this kanji is likely to appear in, if applicable.
///
@@ -38,64 +39,101 @@ class KanjiSearchResult extends Equatable {
/// Information about this character's radical, if applicable.
final KanjiSearchRadical? radical;
// TODO: document more accurately what kind of parts?
/// The parts used in this kanji.
/// All radicals/kanji parts that make up this kanji.
///
/// Note that this list might not always be complete.
final List<String> parts;
/// Ids for the kanji's symbol in different encoding systems
/// (e.g. JIS213, JIS208, UCS, etc.)
final Map<String, String> codepoints;
/// The kanji's nanori readings.
///
/// Nanori readings are special readings of kanji used in names.
final List<String> nanori;
/// How to read this kanji in different languages.
final Map<String, List<String>> alternativeLanguageReadings;
/// Common miscounts of the kanji's strokes.
final List<int> strokeMiscounts;
/// Query codes for looking up this kanji in different indexing systems.
final Map<String, List<String>> queryCodes;
/// References to other dictionaries that contain this kanji.
final Map<String, String> dictionaryReferences;
const KanjiSearchResult({
required this.kanji,
this.taughtIn,
this.jlptLevel,
this.newspaperFrequencyRank,
required this.taughtIn,
required this.jlptLevel,
required this.newspaperFrequencyRank,
required this.strokeCount,
required this.meanings,
this.kunyomi = const [],
this.onyomi = const [],
required this.kunyomi,
required this.onyomi,
// this.kunyomiExamples = const [],
// this.onyomiExamples = const [],
this.radical,
this.parts = const [],
required this.radical,
required this.parts,
required this.codepoints,
required this.nanori,
required this.alternativeLanguageReadings,
required this.strokeMiscounts,
required this.queryCodes,
required this.dictionaryReferences,
});
@override
// ignore: public_member_api_docs
List<Object?> get props => [
taughtIn,
jlptLevel,
newspaperFrequencyRank,
strokeCount,
meanings,
kunyomi,
onyomi,
// kunyomiExamples,
// onyomiExamples,
radical,
parts,
];
taughtIn,
jlptLevel,
newspaperFrequencyRank,
strokeCount,
meanings,
kunyomi,
onyomi,
// kunyomiExamples,
// onyomiExamples,
radical,
parts,
codepoints,
kanji,
nanori,
alternativeLanguageReadings,
strokeMiscounts,
queryCodes,
dictionaryReferences,
];
Map<String, dynamic> toJson() => {
'kanji': kanji,
'taughtIn': taughtIn,
'jlptLevel': jlptLevel,
'newspaperFrequencyRank': newspaperFrequencyRank,
'strokeCount': strokeCount,
'meanings': meanings,
'kunyomi': kunyomi,
'onyomi': onyomi,
// 'onyomiExamples': onyomiExamples,
// 'kunyomiExamples': kunyomiExamples,
'radical': radical?.toJson(),
'parts': parts,
// 'strokeOrderDiagramUri': strokeOrderDiagramUri,
// 'strokeOrderSvgUri': strokeOrderSvgUri,
// 'strokeOrderGifUri': strokeOrderGifUri,
// 'uri': uri,
};
'kanji': kanji,
'taughtIn': taughtIn,
'jlptLevel': jlptLevel,
'newspaperFrequencyRank': newspaperFrequencyRank,
'strokeCount': strokeCount,
'meanings': meanings,
'kunyomi': kunyomi,
'onyomi': onyomi,
// 'onyomiExamples': onyomiExamples,
// 'kunyomiExamples': kunyomiExamples,
'radical': radical?.toJson(),
'parts': parts,
'codepoints': codepoints,
'nanori': nanori,
'alternativeLanguageReadings': alternativeLanguageReadings,
'strokeMiscounts': strokeMiscounts,
'queryCodes': queryCodes,
'dictionaryReferences': dictionaryReferences,
};
factory KanjiSearchResult.fromJson(Map<String, dynamic> json) {
return KanjiSearchResult(
kanji: json['kanji'] as String,
taughtIn: json['taughtIn'] as String?,
taughtIn: json['taughtIn'] as int?,
jlptLevel: json['jlptLevel'] as String?,
newspaperFrequencyRank: json['newspaperFrequencyRank'] as int?,
strokeCount: json['strokeCount'] as int,
@@ -112,6 +150,26 @@ class KanjiSearchResult extends Equatable {
? KanjiSearchRadical.fromJson(json['radical'])
: null,
parts: (json['parts'] as List).map((e) => e as String).toList(),
codepoints: (json['codepoints'] as Map<String, dynamic>).map(
(key, value) => MapEntry(key, value as String),
),
nanori: (json['nanori'] as List).map((e) => e as String).toList(),
alternativeLanguageReadings:
(json['alternativeLanguageReadings'] as Map<String, dynamic>).map(
(key, value) =>
MapEntry(key, (value as List).map((e) => e as String).toList()),
),
strokeMiscounts: (json['strokeMiscounts'] as List)
.map((e) => e as int)
.toList(),
queryCodes: (json['queryCodes'] as Map<String, dynamic>).map(
(key, value) =>
MapEntry(key, (value as List).map((e) => e as String).toList()),
),
dictionaryReferences:
(json['dictionaryReferences'] as Map<String, dynamic>).map(
(key, value) => MapEntry(key, value as String),
),
);
}
}

View File

@@ -1,3 +0,0 @@
class RadicalsSearchResult {
// TODO: implement me
}

View File

@@ -0,0 +1,41 @@
import 'package:jadb/table_names/jmdict.dart';
import 'package:jadb/table_names/kanjidic.dart';
import 'package:jadb/table_names/radkfile.dart';
import 'package:jadb/table_names/tanos_jlpt.dart';
import 'package:sqflite_common/sqlite_api.dart';
Future<void> verifyTablesWithDbConnection(DatabaseExecutor db) async {
final Set<String> tables = await db
.query(
'sqlite_master',
columns: ['name'],
where: 'type = ?',
whereArgs: ['table'],
)
.then((result) {
return result.map((row) => row['name'] as String).toSet();
});
final Set<String> expectedTables = {
...JMdictTableNames.allTables,
...KANJIDICTableNames.allTables,
...RADKFILETableNames.allTables,
...TanosJLPTTableNames.allTables,
};
final missingTables = expectedTables.difference(tables);
if (missingTables.isNotEmpty) {
throw Exception(
[
'Missing tables:',
missingTables.map((table) => ' - $table').join('\n'),
'',
'Found tables:\n',
tables.map((table) => ' - $table').join('\n'),
'',
'Please ensure the database is correctly set up.',
].join('\n'),
);
}
}

View File

@@ -1,3 +1,4 @@
import 'package:jadb/models/common/jlpt_level.dart';
import 'package:jadb/models/jmdict/jmdict_kanji_info.dart';
import 'package:jadb/models/jmdict/jmdict_reading_info.dart';
import 'package:jadb/models/word_search/word_search_ruby.dart';
@@ -6,9 +7,15 @@ import 'package:jadb/models/word_search/word_search_sources.dart';
/// A class representing a single dictionary entry from a word search.
class WordSearchResult {
/// The score of the entry, used for sorting results.
final int score;
/// The ID of the entry in the database.
final int entryId;
/// Whether the word is common or not.
final bool isCommon;
/// The variants of the word in Japanese.
final List<WordSearchRuby> japanese;
@@ -21,32 +28,43 @@ class WordSearchResult {
/// The meanings of the word, including parts of speech and other information.
final List<WordSearchSense> senses;
/// The JLPT level of the word.
final JlptLevel jlptLevel;
/// A class listing the sources used to make up the data for this word search result.
final WordSearchSources sources;
const WordSearchResult({
required this.score,
required this.entryId,
required this.isCommon,
required this.japanese,
required this.kanjiInfo,
required this.readingInfo,
required this.senses,
required this.jlptLevel,
required this.sources,
});
Map<String, dynamic> toJson() => {
'entryId': entryId,
'japanese': japanese.map((e) => e.toJson()).toList(),
'kanjiInfo':
kanjiInfo.map((key, value) => MapEntry(key, value.toJson())),
'readingInfo':
readingInfo.map((key, value) => MapEntry(key, value.toJson())),
'senses': senses.map((e) => e.toJson()).toList(),
'sources': sources.toJson(),
};
'_score': score,
'entryId': entryId,
'isCommon': isCommon,
'japanese': japanese.map((e) => e.toJson()).toList(),
'kanjiInfo': kanjiInfo.map((key, value) => MapEntry(key, value.toJson())),
'readingInfo': readingInfo.map(
(key, value) => MapEntry(key, value.toJson()),
),
'senses': senses.map((e) => e.toJson()).toList(),
'jlptLevel': jlptLevel.toJson(),
'sources': sources.toJson(),
};
factory WordSearchResult.fromJson(Map<String, dynamic> json) =>
WordSearchResult(
score: json['_score'] as int,
entryId: json['entryId'] as int,
isCommon: json['isCommon'] as bool,
japanese: (json['japanese'] as List<dynamic>)
.map((e) => WordSearchRuby.fromJson(e))
.toList(),
@@ -59,6 +77,24 @@ class WordSearchResult {
senses: (json['senses'] as List<dynamic>)
.map((e) => WordSearchSense.fromJson(e))
.toList(),
jlptLevel: JlptLevel.fromJson(json['jlptLevel'] as Object?),
sources: WordSearchSources.fromJson(json['sources']),
);
String _formatJapaneseWord(WordSearchRuby word) =>
word.furigana == null ? word.base : '${word.base} (${word.furigana})';
@override
String toString() {
final japaneseWord = _formatJapaneseWord(japanese[0]);
final isCommonString = isCommon ? '(C)' : '';
final jlptLevelString = '(${jlptLevel.toString()})';
return '''
$score | [$entryId] $japaneseWord $isCommonString $jlptLevelString
Other forms: ${japanese.skip(1).map(_formatJapaneseWord).join(', ')}
Senses: ${senses.map((s) => s.englishDefinitions).join(', ')}
'''
.trim();
}
}

View File

@@ -6,18 +6,12 @@ class WordSearchRuby {
/// Furigana, if applicable.
String? furigana;
WordSearchRuby({
required this.base,
this.furigana,
});
WordSearchRuby({required this.base, this.furigana});
Map<String, dynamic> toJson() => {
'base': base,
'furigana': furigana,
};
Map<String, dynamic> toJson() => {'base': base, 'furigana': furigana};
factory WordSearchRuby.fromJson(Map<String, dynamic> json) => WordSearchRuby(
base: json['base'] as String,
furigana: json['furigana'] as String?,
);
base: json['base'] as String,
furigana: json['furigana'] as String?,
);
}

View File

@@ -2,6 +2,7 @@ import 'package:jadb/models/jmdict/jmdict_dialect.dart';
import 'package:jadb/models/jmdict/jmdict_field.dart';
import 'package:jadb/models/jmdict/jmdict_misc.dart';
import 'package:jadb/models/jmdict/jmdict_pos.dart';
import 'package:jadb/models/word_search/word_search_sense_language_source.dart';
import 'package:jadb/models/word_search/word_search_xref_entry.dart';
class WordSearchSense {
@@ -38,7 +39,7 @@ class WordSearchSense {
// TODO: there is a lot more info to collect in the languageSource data
/// Information about the the origin of the word, if loaned from another language.
final List<String> languageSource;
final List<WordSearchSenseLanguageSource> languageSource;
// TODO: add example sentences
@@ -70,18 +71,18 @@ class WordSearchSense {
languageSource.isEmpty;
Map<String, dynamic> toJson() => {
'englishDefinitions': englishDefinitions,
'partsOfSpeech': partsOfSpeech.map((e) => e.toJson()).toList(),
'seeAlso': seeAlso.map((e) => e.toJson()).toList(),
'antonyms': antonyms.map((e) => e.toJson()).toList(),
'restrictedToReading': restrictedToReading,
'restrictedToKanji': restrictedToKanji,
'fields': fields.map((e) => e.toJson()).toList(),
'dialects': dialects.map((e) => e.toJson()).toList(),
'misc': misc.map((e) => e.toJson()).toList(),
'info': info,
'languageSource': languageSource,
};
'englishDefinitions': englishDefinitions,
'partsOfSpeech': partsOfSpeech.map((e) => e.toJson()).toList(),
'seeAlso': seeAlso.map((e) => e.toJson()).toList(),
'antonyms': antonyms.map((e) => e.toJson()).toList(),
'restrictedToReading': restrictedToReading,
'restrictedToKanji': restrictedToKanji,
'fields': fields.map((e) => e.toJson()).toList(),
'dialects': dialects.map((e) => e.toJson()).toList(),
'misc': misc.map((e) => e.toJson()).toList(),
'info': info,
'languageSource': languageSource,
};
factory WordSearchSense.fromJson(Map<String, dynamic> json) =>
WordSearchSense(
@@ -103,9 +104,12 @@ class WordSearchSense {
dialects: (json['dialects'] as List)
.map((e) => JMdictDialect.fromJson(e))
.toList(),
misc:
(json['misc'] as List).map((e) => JMdictMisc.fromJson(e)).toList(),
misc: (json['misc'] as List)
.map((e) => JMdictMisc.fromJson(e))
.toList(),
info: List<String>.from(json['info']),
languageSource: List<String>.from(json['languageSource']),
languageSource: (json['languageSource'] as List)
.map((e) => WordSearchSenseLanguageSource.fromJson(e))
.toList(),
);
}

View File

@@ -0,0 +1,30 @@
/// A reference to a foreign language where this sense originates from.
class WordSearchSenseLanguageSource {
final String language;
final String? phrase;
final bool fullyDescribesSense;
final bool constructedFromSmallerWords;
const WordSearchSenseLanguageSource({
required this.language,
this.phrase,
this.fullyDescribesSense = true,
this.constructedFromSmallerWords = false,
});
Map<String, Object?> toJson() => {
'language': language,
'phrase': phrase,
'fullyDescribesSense': fullyDescribesSense,
'constructedFromSmallerWords': constructedFromSmallerWords,
};
factory WordSearchSenseLanguageSource.fromJson(Map<String, dynamic> json) =>
WordSearchSenseLanguageSource(
language: json['language'],
phrase: json['phrase'],
fullyDescribesSense: json['fullyDescribesSense'] ?? true,
constructedFromSmallerWords:
json['constructedFromSmallerWords'] ?? false,
);
}

View File

@@ -7,20 +7,11 @@ class WordSearchSources {
/// Whether JMnedict was used.
final bool jmnedict;
const WordSearchSources({
this.jmdict = true,
this.jmnedict = false,
});
const WordSearchSources({this.jmdict = true, this.jmnedict = false});
Map<String, Object?> get sqlValue => {
'jmdict': jmdict,
'jmnedict': jmnedict,
};
Map<String, Object?> get sqlValue => {'jmdict': jmdict, 'jmnedict': jmnedict};
Map<String, dynamic> toJson() => {
'jmdict': jmdict,
'jmnedict': jmnedict,
};
Map<String, dynamic> toJson() => {'jmdict': jmdict, 'jmnedict': jmnedict};
factory WordSearchSources.fromJson(Map<String, dynamic> json) =>
WordSearchSources(

View File

@@ -1,25 +1,45 @@
import 'package:jadb/models/word_search/word_search_result.dart';
/// A cross-reference entry from one word-result to another entry.
class WordSearchXrefEntry {
/// The ID of the entry that this entry cross-references to.
final int entryId;
/// The base word of the cross-referenced entry.
final String baseWord;
/// The furigana of the cross-referenced entry, if any.
final String? furigana;
/// Whether the entryId was ambiguous during the creation of the
/// database (and hence might be incorrect).
final bool ambiguous;
/// The result of the cross-reference, may or may not be included in the query.
final WordSearchResult? xrefResult;
const WordSearchXrefEntry({
required this.entryId,
required this.ambiguous,
required this.baseWord,
required this.furigana,
required this.xrefResult,
});
Map<String, dynamic> toJson() => {
'entryId': entryId,
'ambiguous': ambiguous,
};
'entryId': entryId,
'ambiguous': ambiguous,
'baseWord': baseWord,
'furigana': furigana,
'xrefResult': xrefResult?.toJson(),
};
factory WordSearchXrefEntry.fromJson(Map<String, dynamic> json) =>
WordSearchXrefEntry(
entryId: json['entryId'] as int,
ambiguous: json['ambiguous'] as bool,
baseWord: json['baseWord'] as String,
furigana: json['furigana'] as String?,
xrefResult: null,
);
}

View File

@@ -1,25 +1,71 @@
import 'package:jadb/models/word_search/word_search_result.dart';
import 'package:jadb/models/kanji_search/kanji_search_result.dart';
import 'package:jadb/models/radkfile/radicals_search_result.dart';
import 'package:jadb/search/word_search.dart';
import 'package:jadb/models/verify_tables.dart';
import 'package:jadb/models/word_search/word_search_result.dart';
import 'package:jadb/search/filter_kanji.dart';
import 'package:jadb/search/kanji_search.dart';
import 'package:jadb/search/radical_search.dart';
import 'package:jadb/search/word_search/word_search.dart';
import 'package:sqflite_common/sqlite_api.dart';
class JaDBConnection {
final DatabaseExecutor _connection;
extension JaDBConnection on DatabaseExecutor {
/// Ensure that the database contain all JaDB tables.
///
/// This will throw an exception if any of the tables are missing.
Future<void> jadbVerifyTables() => verifyTablesWithDbConnection(this);
const JaDBConnection(this._connection);
/// Search for a kanji in the database.
Future<KanjiSearchResult?> jadbSearchKanji(String kanji) =>
searchKanjiWithDbConnection(this, kanji);
Future<KanjiSearchResult?> searchKanji(String kanji) async =>
searchKanjiWithDbConnection(this._connection, kanji);
/// Search for a kanji in the database.
Future<Map<String, KanjiSearchResult>> jadbGetManyKanji(Set<String> kanji) =>
searchManyKanjiWithDbConnection(this, kanji);
Future<RadicalsSearchResult> searchKanjiByRadicals(
List<String> radicals) async {
throw UnimplementedError();
}
/// Filter a list of characters, and return the ones that are listed in the kanji dictionary.
Future<List<String>> filterKanji(
List<String> kanji, {
bool deduplicate = false,
}) => filterKanjiWithDbConnection(this, kanji, deduplicate);
Future<List<WordSearchResult>?> searchWord(String word) async =>
searchWordWithDbConnection(this._connection, word);
/// Search for a word in the database.
Future<List<WordSearchResult>?> jadbSearchWord(
String word, {
SearchMode searchMode = SearchMode.Auto,
int page = 0,
int? pageSize,
}) => searchWordWithDbConnection(
this,
word,
searchMode: searchMode,
page: page,
pageSize: pageSize,
);
///
Future<WordSearchResult?> jadbGetWordById(int id) =>
getWordByIdWithDbConnection(this, id);
/// Get a list of words by their IDs.
///
/// IDs for which no result is found are omitted from the returned value.
Future<Map<int, WordSearchResult>> jadbGetManyWordsByIds(Set<int> ids) =>
getWordsByIdsWithDbConnection(this, ids);
/// Search for a word in the database, and return the count of results.
Future<int?> jadbSearchWordCount(
String word, {
SearchMode searchMode = SearchMode.Auto,
}) => searchWordCountWithDbConnection(this, word, searchMode: searchMode);
/// Given a list of radicals, search which kanji contains all
/// of the radicals, find their other radicals, and return those.
/// This is used to figure out which remaining combinations of radicals
/// the user can search for without getting zero results.
Future<List<String>> jadbSearchRemainingRadicals(List<String> radicals) =>
searchRemainingRadicalsWithDbConnection(this, radicals);
/// Given a list of radicals, search which kanji contains all
/// of the radicals, and return those.
Future<List<String>> jadbSearchKanjiByRadicals(List<String> radicals) =>
searchKanjiByRadicalsWithDbConnection(this, radicals);
}

View File

@@ -0,0 +1,22 @@
import 'package:jadb/table_names/kanjidic.dart';
import 'package:sqflite_common/sqflite.dart';
Future<List<String>> filterKanjiWithDbConnection(
DatabaseExecutor connection,
List<String> kanji,
bool deduplicate,
) async {
final Set<String> filteredKanji = await connection
.rawQuery('''
SELECT "literal"
FROM "${KANJIDICTableNames.character}"
WHERE "literal" IN (${kanji.map((_) => '?').join(',')})
''', kanji)
.then((value) => value.map((e) => e['literal'] as String).toSet());
if (deduplicate) {
return filteredKanji.toList();
} else {
return kanji.where((k) => filteredKanji.contains(k)).toList();
}
}

View File

@@ -1,4 +1,9 @@
import 'package:collection/collection.dart';
import 'package:jadb/models/kanji_search/kanji_search_radical.dart';
import 'package:jadb/models/kanji_search/kanji_search_result.dart';
import 'package:jadb/table_names/kanjidic.dart';
import 'package:jadb/table_names/radkfile.dart';
import 'package:jadb/util/romaji_transliteration.dart';
import 'package:sqflite_common/sqflite.dart';
Future<KanjiSearchResult?> searchKanjiWithDbConnection(
@@ -6,136 +11,173 @@ Future<KanjiSearchResult?> searchKanjiWithDbConnection(
String kanji,
) async {
late final List<Map<String, Object?>> characters;
final characters_query = connection.query(
"KANJIDIC_Character",
where: "KANJIDIC_Character.literal = ?",
final charactersQuery = connection.query(
KANJIDICTableNames.character,
where: 'literal = ?',
whereArgs: [kanji],
);
late final List<Map<String, Object?>> codepoints;
final codepoints_query = connection.query(
"KANJIDIC_Codepoint",
where: "KANJIDIC_Codepoint.kanji = ?",
final codepointsQuery = connection.query(
KANJIDICTableNames.codepoint,
where: 'kanji = ?',
whereArgs: [kanji],
);
late final List<Map<String, Object?>> kunyomis;
final kunyomis_query = connection.query(
"KANJIDIC_Kunyomi",
where: "KANJIDIC_Kunyomi.kanji = ?",
final kunyomisQuery = connection.query(
KANJIDICTableNames.kunyomi,
where: 'kanji = ?',
whereArgs: [kanji],
orderBy: 'orderNum',
);
late final List<Map<String, Object?>> onyomis;
final onyomis_query = connection.query(
"KANJIDIC_Onyomi",
where: "KANJIDIC_Onyomi.kanji = ?",
final onyomisQuery = connection.query(
KANJIDICTableNames.onyomi,
where: 'kanji = ?',
whereArgs: [kanji],
orderBy: 'orderNum',
);
late final List<Map<String, Object?>> meanings;
final meanings_query = connection.query(
"KANJIDIC_Meaning",
where: "KANJIDIC_Meaning.kanji = ? AND KANJIDIC_Meaning.language = ?",
final meaningsQuery = connection.query(
KANJIDICTableNames.meaning,
where: 'kanji = ? AND language = ?',
whereArgs: [kanji, 'eng'],
orderBy: 'orderNum',
);
late final List<Map<String, Object?>> nanoris;
final nanoris_query = connection.query(
"KANJIDIC_Nanori",
where: "KANJIDIC_Nanori.kanji = ?",
final nanorisQuery = connection.query(
KANJIDICTableNames.nanori,
where: 'kanji = ?',
whereArgs: [kanji],
);
late final List<Map<String, Object?>> dictionary_references;
final dictionary_references_query = connection.query(
"KANJIDIC_DictionaryReference",
where: "KANJIDIC_DictionaryReference.kanji = ?",
late final List<Map<String, Object?>> dictionaryReferences;
final dictionaryReferencesQuery = connection.query(
KANJIDICTableNames.dictionaryReference,
where: 'kanji = ?',
whereArgs: [kanji],
);
late final List<Map<String, Object?>> query_codes;
final query_codes_query = connection.query(
"KANJIDIC_QueryCode",
where: "KANJIDIC_QueryCode.kanji = ?",
late final List<Map<String, Object?>> queryCodes;
final queryCodesQuery = connection.query(
KANJIDICTableNames.queryCode,
where: 'kanji = ?',
whereArgs: [kanji],
);
late final List<Map<String, Object?>> radicals;
final radicals_query = connection.query(
"KANJIDIC_Radical",
where: "KANJIDIC_Radical.kanji = ?",
whereArgs: [kanji],
final radicalsQuery = connection.rawQuery(
'''
SELECT DISTINCT
"XREF__KANJIDIC_Radical__RADKFILE"."radicalSymbol" AS "symbol",
"names"
FROM "${KANJIDICTableNames.radical}"
JOIN "XREF__KANJIDIC_Radical__RADKFILE" USING ("radicalId")
LEFT JOIN (
SELECT "radicalId", group_concat("name") AS "names"
FROM "${KANJIDICTableNames.radicalName}"
GROUP BY "radicalId"
) USING ("radicalId")
WHERE "${KANJIDICTableNames.radical}"."kanji" = ?
''',
[kanji],
);
late final List<Map<String, Object?>> radical_names;
final radical_names_query = connection.query(
"KANJIDIC_RadicalName",
where: "KANJIDIC_RadicalName.kanji = ?",
late final List<Map<String, Object?>> parts;
final partsQuery = connection.query(
RADKFILETableNames.radkfile,
where: 'kanji = ?',
whereArgs: [kanji],
);
late final List<Map<String, Object?>> readings;
final readings_query = connection.query(
"KANJIDIC_Reading",
where: "KANJIDIC_Reading.kanji = ?",
final readingsQuery = connection.query(
KANJIDICTableNames.reading,
where: 'kanji = ?',
whereArgs: [kanji],
);
late final List<Map<String, Object?>> stroke_miscounts;
final stroke_miscounts_query = connection.query(
"KANJIDIC_StrokeMiscount",
where: "KANJIDIC_StrokeMiscount.kanji = ?",
late final List<Map<String, Object?>> strokeMiscounts;
final strokeMiscountsQuery = connection.query(
KANJIDICTableNames.strokeMiscount,
where: 'kanji = ?',
whereArgs: [kanji],
);
late final List<Map<String, Object?>> variants;
final variants_query = connection.query(
"KANJIDIC_Variant",
where: "KANJIDIC_Variant.kanji = ?",
whereArgs: [kanji],
);
// TODO: add variant data to result
// late final List<Map<String, Object?>> variants;
// final variants_query = connection.query(
// KANJIDICTableNames.variant,
// where: "kanji = ?",
// whereArgs: [kanji],
// );
// TODO: Search for kunyomi and onyomi usage of the characters
// from JMDict. We'll need to fuzzy aquery JMDict_KanjiElement for mathces,
// filter JMdict_ReadingElement for kunyomi/onyomi, and then sort the main entry
// by JLPT, news frequency, etc.
// TODO: Search for kunyomi and onyomi usage of the characters
// from JMDict. We'll need to fuzzy aquery JMDict_KanjiElement for mathces,
// filter JMdict_ReadingElement for kunyomi/onyomi, and then sort the main entry
// by JLPT, news frequency, etc.
await characters_query.then((value) => characters = value);
await charactersQuery.then((value) => characters = value);
if (characters.isEmpty) {
return null;
}
await Future.wait({
codepoints_query.then((value) => codepoints = value),
kunyomis_query.then((value) => kunyomis = value),
onyomis_query.then((value) => onyomis = value),
meanings_query.then((value) => meanings = value),
nanoris_query.then((value) => nanoris = value),
dictionary_references_query.then((value) => dictionary_references = value),
query_codes_query.then((value) => query_codes = value),
radicals_query.then((value) => radicals = value),
radical_names_query.then((value) => radical_names = value),
readings_query.then((value) => readings = value),
stroke_miscounts_query.then((value) => stroke_miscounts = value),
variants_query.then((value) => variants = value),
codepointsQuery.then((value) => codepoints = value),
kunyomisQuery.then((value) => kunyomis = value),
onyomisQuery.then((value) => onyomis = value),
meaningsQuery.then((value) => meanings = value),
nanorisQuery.then((value) => nanoris = value),
dictionaryReferencesQuery.then((value) => dictionaryReferences = value),
queryCodesQuery.then((value) => queryCodes = value),
radicalsQuery.then((value) => radicals = value),
partsQuery.then((value) => parts = value),
readingsQuery.then((value) => readings = value),
strokeMiscountsQuery.then((value) => strokeMiscounts = value),
// variants_query.then((value) => variants = value),
});
final entry = characters.first;
final String? grade = {
1: 'grade 1',
2: 'grade 2',
3: 'grade 3',
4: 'grade 4',
5: 'grade 5',
6: 'grade 6',
7: 'grade 7',
8: 'grade 8',
9: 'grade 9',
10: 'grade 10',
}[entry['grade'] as int?];
assert(radicals.length <= 1, 'There should be at most one radical per kanji');
final radical = radicals.isNotEmpty
? KanjiSearchRadical(
symbol: radicals.first['symbol'] as String,
names: (radicals.first['names'] as String?)?.split(',') ?? [],
// TODO: add radical form data
forms: [],
// TODO: add radical meaning data
meanings: [],
)
: null;
final alternativeLanguageReadings = readings
.groupListsBy((item) => item['type'] as String)
.map(
(key, value) => MapEntry(
key,
value.map((item) => item['reading'] as String).toList(),
),
);
// TODO: Add `SKIPMisclassification` to the entries
final queryCodes_ = queryCodes
.groupListsBy((item) => item['type'] as String)
.map(
(key, value) =>
MapEntry(key, value.map((item) => item['code'] as String).toList()),
);
// TODO: Add `volume` and `page` to the entries
final dictionaryReferences_ = {
for (final entry in dictionaryReferences)
entry['type'] as String: entry['ref'] as String,
};
final String? jlptLevel = {
5: 'N5',
@@ -147,12 +189,50 @@ Future<KanjiSearchResult?> searchKanjiWithDbConnection(
return KanjiSearchResult(
kanji: entry['literal']! as String,
taughtIn: grade,
taughtIn: entry['grade'] as int?,
jlptLevel: jlptLevel,
newspaperFrequencyRank: entry['frequency'] as int?,
strokeCount: entry['strokeCount'] as int,
meanings: meanings.map((item) => item['meaning'] as String).toList(),
kunyomi: kunyomis.map((item) => item['yomi'] as String).toList(),
onyomi: onyomis.map((item) => item['yomi'] as String).toList(),
parts: parts.map((item) => item['radical'] as String).toList(),
onyomi: onyomis
.map((item) => item['yomi'] as String)
.map(transliterateHiraganaToKatakana)
.toList(),
radical: radical,
codepoints: {
for (final codepoint in codepoints)
codepoint['type'] as String: codepoint['codepoint'] as String,
},
nanori: nanoris.map((item) => item['nanori'] as String).toList(),
alternativeLanguageReadings: alternativeLanguageReadings,
strokeMiscounts: strokeMiscounts
.map((item) => item['strokeCount'] as int)
.toList(),
queryCodes: queryCodes_,
dictionaryReferences: dictionaryReferences_,
);
}
// TODO: Use fewer queries with `IN` clauses to reduce the number of queries
Future<Map<String, KanjiSearchResult>> searchManyKanjiWithDbConnection(
DatabaseExecutor connection,
Set<String> kanji,
) async {
if (kanji.isEmpty) {
return {};
}
final results = <String, KanjiSearchResult>{};
for (final k in kanji) {
final result = await searchKanjiWithDbConnection(connection, k);
if (result != null) {
results[k] = result;
}
}
return results;
}

View File

@@ -0,0 +1,50 @@
import 'package:jadb/table_names/radkfile.dart';
import 'package:sqflite_common/sqlite_api.dart';
// TODO: validate that the list of radicals all are valid radicals
Future<List<String>> searchRemainingRadicalsWithDbConnection(
DatabaseExecutor connection,
List<String> radicals,
) async {
final queryResult = await connection.rawQuery(
'''
SELECT DISTINCT "radical"
FROM "${RADKFILETableNames.radkfile}"
WHERE "kanji" IN (
SELECT "kanji"
FROM "${RADKFILETableNames.radkfile}"
WHERE "radical" IN (${List.filled(radicals.length, '?').join(',')})
GROUP BY "kanji"
HAVING COUNT(DISTINCT "radical") = ?
)
''',
[...radicals, radicals.length],
);
final remainingRadicals = queryResult
.map((row) => row['radical'] as String)
.toList();
return remainingRadicals;
}
Future<List<String>> searchKanjiByRadicalsWithDbConnection(
DatabaseExecutor connection,
List<String> radicals,
) async {
final queryResult = await connection.rawQuery(
'''
SELECT "kanji"
FROM "${RADKFILETableNames.radkfile}"
WHERE "radical" IN (${List.filled(radicals.length, '?').join(',')})
GROUP BY "kanji"
HAVING COUNT(DISTINCT "radical") = ?
''',
[...radicals, radicals.length],
);
final kanji = queryResult.map((row) => row['kanji'] as String).toList();
return kanji;
}

View File

@@ -1,503 +0,0 @@
import 'package:collection/collection.dart';
import 'package:jadb/models/jmdict/jmdict_dialect.dart';
import 'package:jadb/models/jmdict/jmdict_field.dart';
import 'package:jadb/models/jmdict/jmdict_kanji_info.dart';
import 'package:jadb/models/jmdict/jmdict_misc.dart';
import 'package:jadb/models/jmdict/jmdict_pos.dart';
import 'package:jadb/models/jmdict/jmdict_reading_info.dart';
import 'package:jadb/models/word_search/word_search_result.dart';
import 'package:jadb/models/word_search/word_search_ruby.dart';
import 'package:jadb/models/word_search/word_search_sense.dart';
import 'package:jadb/models/word_search/word_search_sources.dart';
import 'package:jadb/models/word_search/word_search_xref_entry.dart';
import 'package:jadb/util/sqlite_utils.dart';
import 'package:sqflite_common/sqlite_api.dart';
// TODO: Support globs
// TODO: Support tags
// TODO: Prefer original kana type when sorting results
// TODO: Support mixing kana and romaji
Future<List<WordSearchResult>?> searchWordWithDbConnection(
DatabaseExecutor connection,
String word, {
bool isKana = true,
}) async {
if (word.isEmpty) {
return null;
}
late final List<int> entryIds;
if (isKana) {
entryIds = (await connection.query(
'JMdict_EntryByKana',
where: 'kana LIKE ?',
whereArgs: ['$word%'],
))
.map((row) => row['entryId'] as int)
.toList();
} else {
entryIds = (await connection.query(
'JMdict_EntryByEnglish',
where: 'english LIKE ?',
whereArgs: ['$word%'],
))
.map((row) => row['entryId'] as int)
.toList();
}
if (entryIds.isEmpty) {
return [];
}
late final List<Map<String, Object?>> senses;
final Future<List<Map<String, Object?>>> senses_query = connection.query(
'JMdict_Sense',
where: 'entryId IN (${entryIds.join(',')})',
);
late final List<Map<String, Object?>> readingElements;
final Future<List<Map<String, Object?>>> readingElements_query =
connection.query(
'JMdict_ReadingElement',
where: 'entryId IN (${entryIds.join(',')})',
);
late final List<Map<String, Object?>> kanjiElements;
final Future<List<Map<String, Object?>>> kanjiElements_query =
connection.query(
'JMdict_KanjiElement',
where: 'entryId IN (${entryIds.join(',')})',
);
await Future.wait([
senses_query.then((value) => senses = value),
readingElements_query.then((value) => readingElements = value),
kanjiElements_query.then((value) => kanjiElements = value),
]);
// Sense queries
final senseIds = senses.map((element) => element['id'] as int).toList();
late final List<Map<String, Object?>> senseAntonyms;
final Future<List<Map<String, Object?>>> senseAntonyms_query =
connection.query(
'JMdict_SenseAntonym',
where: 'senseId IN (${senseIds.join(',')})',
);
late final List<Map<String, Object?>> senseDialects;
final Future<List<Map<String, Object?>>> senseDialects_query =
connection.query(
'JMdict_SenseDialect',
where: 'senseId IN (${senseIds.join(',')})',
);
late final List<Map<String, Object?>> senseFields;
final Future<List<Map<String, Object?>>> senseFields_query = connection.query(
'JMdict_SenseField',
where: 'senseId IN (${senseIds.join(',')})',
);
late final List<Map<String, Object?>> senseGlossaries;
final Future<List<Map<String, Object?>>> senseGlossaries_query =
connection.query(
'JMdict_SenseGlossary',
where: 'senseId IN (${senseIds.join(',')})',
);
late final List<Map<String, Object?>> senseInfos;
final Future<List<Map<String, Object?>>> senseInfos_query = connection.query(
'JMdict_SenseInfo',
where: 'senseId IN (${senseIds.join(',')})',
);
late final List<Map<String, Object?>> senseLanguageSources;
final Future<List<Map<String, Object?>>> senseLanguageSources_query =
connection.query(
'JMdict_SenseLanguageSource',
where: 'senseId IN (${senseIds.join(',')})',
);
late final List<Map<String, Object?>> senseMiscs;
final Future<List<Map<String, Object?>>> senseMiscs_query = connection.query(
'JMdict_SenseMisc',
where: 'senseId IN (${senseIds.join(',')})',
);
late final List<Map<String, Object?>> sensePOSs;
final Future<List<Map<String, Object?>>> sensePOSs_query = connection.query(
'JMdict_SensePOS',
where: 'senseId IN (${senseIds.join(',')})',
);
late final List<Map<String, Object?>> senseRestrictedToKanjis;
final Future<List<Map<String, Object?>>> senseRestrictedToKanjis_query =
connection.query(
'JMdict_SenseRestrictedToKanji',
where: 'senseId IN (${senseIds.join(',')})',
);
late final List<Map<String, Object?>> senseRestrictedToReadings;
final Future<List<Map<String, Object?>>> senseRestrictedToReadings_query =
connection.query(
'JMdict_SenseRestrictedToReading',
where: 'senseId IN (${senseIds.join(',')})',
);
late final List<Map<String, Object?>> senseSeeAlsos;
final Future<List<Map<String, Object?>>> senseSeeAlsos_query =
connection.query(
'JMdict_SenseSeeAlso',
where: 'senseId IN (${senseIds.join(',')})',
);
late final List<Map<String, Object?>> exampleSentences;
final Future<List<Map<String, Object?>>> exampleSentences_query =
connection.query(
'JMdict_ExampleSentence',
where: 'senseId IN (${senseIds.join(',')})',
);
// Reading queries
final readingIds = readingElements
.map((element) => (
element['entryId'] as int,
escapeStringValue(element['reading'] as String)
))
.toList();
late final List<Map<String, Object?>> readingElementInfos;
final Future<List<Map<String, Object?>>> readingElementInfos_query =
connection.query(
'JMdict_ReadingElementInfo',
where: '(entryId, reading) IN (${readingIds.join(',')})',
);
late final List<Map<String, Object?>> readingElementRestrictions;
final Future<List<Map<String, Object?>>> readingElementRestrictions_query =
connection.query(
'JMdict_ReadingElementRestriction',
where: '(entryId, reading) IN (${readingIds.join(',')})',
);
// Kanji queries
final kanjiIds = kanjiElements
.map((element) => (
element['entryId'] as int,
escapeStringValue(element['reading'] as String)
))
.toList();
late final List<Map<String, Object?>> kanjiElementInfos;
final Future<List<Map<String, Object?>>> kanjiElementInfos_query =
connection.query(
'JMdict_KanjiElementInfo',
where: '(entryId, reading) IN (${kanjiIds.join(',')})',
);
await Future.wait([
senseAntonyms_query.then((value) => senseAntonyms = value),
senseDialects_query.then((value) => senseDialects = value),
senseFields_query.then((value) => senseFields = value),
senseGlossaries_query.then((value) => senseGlossaries = value),
senseInfos_query.then((value) => senseInfos = value),
senseLanguageSources_query.then((value) => senseLanguageSources = value),
senseMiscs_query.then((value) => senseMiscs = value),
sensePOSs_query.then((value) => sensePOSs = value),
senseRestrictedToKanjis_query
.then((value) => senseRestrictedToKanjis = value),
senseRestrictedToReadings_query
.then((value) => senseRestrictedToReadings = value),
senseSeeAlsos_query.then((value) => senseSeeAlsos = value),
exampleSentences_query.then((value) => exampleSentences = value),
readingElementInfos_query.then((value) => readingElementInfos = value),
readingElementRestrictions_query
.then((value) => readingElementRestrictions = value),
kanjiElementInfos_query.then((value) => kanjiElementInfos = value),
]);
return _regroupWordSearchResults(
entryIds: entryIds,
readingElements: readingElements,
kanjiElements: kanjiElements,
senses: senses,
senseAntonyms: senseAntonyms,
senseDialects: senseDialects,
senseFields: senseFields,
senseGlossaries: senseGlossaries,
senseInfos: senseInfos,
senseLanguageSources: senseLanguageSources,
senseMiscs: senseMiscs,
sensePOSs: sensePOSs,
senseRestrictedToKanjis: senseRestrictedToKanjis,
senseRestrictedToReadings: senseRestrictedToReadings,
senseSeeAlsos: senseSeeAlsos,
exampleSentences: exampleSentences,
readingElementInfos: readingElementInfos,
readingElementRestrictions: readingElementRestrictions,
kanjiElementInfos: kanjiElementInfos,
);
}
List<WordSearchResult> _regroupWordSearchResults({
required List<int> entryIds,
required List<Map<String, Object?>> readingElements,
required List<Map<String, Object?>> kanjiElements,
required List<Map<String, Object?>> senses,
required List<Map<String, Object?>> senseAntonyms,
required List<Map<String, Object?>> senseDialects,
required List<Map<String, Object?>> senseFields,
required List<Map<String, Object?>> senseGlossaries,
required List<Map<String, Object?>> senseInfos,
required List<Map<String, Object?>> senseLanguageSources,
required List<Map<String, Object?>> senseMiscs,
required List<Map<String, Object?>> sensePOSs,
required List<Map<String, Object?>> senseRestrictedToKanjis,
required List<Map<String, Object?>> senseRestrictedToReadings,
required List<Map<String, Object?>> senseSeeAlsos,
required List<Map<String, Object?>> exampleSentences,
required List<Map<String, Object?>> readingElementInfos,
required List<Map<String, Object?>> readingElementRestrictions,
required List<Map<String, Object?>> kanjiElementInfos,
}) {
final List<WordSearchResult> results = [];
for (final entryId in entryIds) {
final List<Map<String, Object?>> entryReadingElements = readingElements
.where((element) => element['entryId'] == entryId)
.toList();
final List<Map<String, Object?>> entryKanjiElements = kanjiElements
.where((element) => element['entryId'] == entryId)
.toList();
final List<Map<String, Object?>> entrySenses =
senses.where((element) => element['entryId'] == entryId).toList();
final GroupedWordResult entryReadingElementsGrouped = _regroup_words(
entryId: entryId,
readingElements: entryReadingElements,
kanjiElements: entryKanjiElements,
readingElementInfos: readingElementInfos,
readingElementRestrictions: readingElementRestrictions,
kanjiElementInfos: kanjiElementInfos,
);
final List<WordSearchSense> entrySensesGrouped = _regroup_senses(
senses: entrySenses,
senseAntonyms: senseAntonyms,
senseDialects: senseDialects,
senseFields: senseFields,
senseGlossaries: senseGlossaries,
senseInfos: senseInfos,
senseLanguageSources: senseLanguageSources,
senseMiscs: senseMiscs,
sensePOSs: sensePOSs,
senseRestrictedToKanjis: senseRestrictedToKanjis,
senseRestrictedToReadings: senseRestrictedToReadings,
senseSeeAlsos: senseSeeAlsos,
exampleSentences: exampleSentences,
);
results.add(
WordSearchResult(
entryId: entryId,
japanese: entryReadingElementsGrouped.rubys,
kanjiInfo: entryReadingElementsGrouped.kanjiInfos,
readingInfo: entryReadingElementsGrouped.readingInfos,
senses: entrySensesGrouped,
sources: const WordSearchSources(
jmdict: true,
jmnedict: false,
),
),
);
}
return results;
}
class GroupedWordResult {
final List<WordSearchRuby> rubys;
final Map<String, JMdictReadingInfo> readingInfos;
final Map<String, JMdictKanjiInfo> kanjiInfos;
const GroupedWordResult({
required this.rubys,
required this.readingInfos,
required this.kanjiInfos,
});
}
GroupedWordResult _regroup_words({
required int entryId,
required List<Map<String, Object?>> kanjiElements,
required List<Map<String, Object?>> kanjiElementInfos,
required List<Map<String, Object?>> readingElements,
required List<Map<String, Object?>> readingElementInfos,
required List<Map<String, Object?>> readingElementRestrictions,
}) {
final List<WordSearchRuby> result = [];
final kanjiElements_ =
kanjiElements.where((element) => element['entryId'] == entryId).toList();
final readingElements_ = readingElements
.where((element) => element['entryId'] == entryId)
.toList();
final readingElementRestrictions_ = readingElementRestrictions
.where((element) => element['entryId'] == entryId)
.toList();
for (final readingElement in readingElements_) {
for (final kanjiElement in kanjiElements_) {
final kanji = kanjiElement['reading'] as String;
final reading = readingElement['reading'] as String;
final doesNotMatchKanji = readingElement['doesNotMatchKanji'] == 1;
if (doesNotMatchKanji) {
continue;
}
final restrictions = readingElementRestrictions_
.where((element) => element['reading'] == reading)
.toList();
if (restrictions.isNotEmpty &&
!restrictions.any((element) => element['restriction'] == kanji)) {
continue;
}
final ruby = WordSearchRuby(
base: kanji,
furigana: reading,
);
result.add(ruby);
}
}
for (final readingElement
in readingElements_.where((e) => e['doesNotMatchKanji'] == 1)) {
final reading = readingElement['reading'] as String;
final ruby = WordSearchRuby(
base: reading,
);
result.add(ruby);
}
return GroupedWordResult(
rubys: result,
readingInfos: Map.fromEntries(
readingElementInfos.map((e) => MapEntry(
e['reading'] as String,
JMdictReadingInfo.fromId(e['info'] as String),
)),
),
kanjiInfos: Map.fromEntries(
kanjiElementInfos.map((e) => MapEntry(
e['reading'] as String,
JMdictKanjiInfo.fromId(e['info'] as String),
)),
),
);
}
List<WordSearchSense> _regroup_senses({
required List<Map<String, Object?>> senses,
required List<Map<String, Object?>> senseAntonyms,
required List<Map<String, Object?>> senseDialects,
required List<Map<String, Object?>> senseFields,
required List<Map<String, Object?>> senseGlossaries,
required List<Map<String, Object?>> senseInfos,
required List<Map<String, Object?>> senseLanguageSources,
required List<Map<String, Object?>> senseMiscs,
required List<Map<String, Object?>> sensePOSs,
required List<Map<String, Object?>> senseRestrictedToKanjis,
required List<Map<String, Object?>> senseRestrictedToReadings,
required List<Map<String, Object?>> senseSeeAlsos,
required List<Map<String, Object?>> exampleSentences,
}) {
final groupedSenseAntonyms =
senseAntonyms.groupListsBy((element) => element['senseId'] as int);
final groupedSenseDialects =
senseDialects.groupListsBy((element) => element['senseId'] as int);
final groupedSenseFields =
senseFields.groupListsBy((element) => element['senseId'] as int);
final groupedSenseGlossaries =
senseGlossaries.groupListsBy((element) => element['senseId'] as int);
final groupedSenseInfos =
senseInfos.groupListsBy((element) => element['senseId'] as int);
final groupedSenseLanguageSources =
senseLanguageSources.groupListsBy((element) => element['senseId'] as int);
final groupedSenseMiscs =
senseMiscs.groupListsBy((element) => element['senseId'] as int);
final groupedSensePOSs =
sensePOSs.groupListsBy((element) => element['senseId'] as int);
final groupedSenseRestrictedToKanjis = senseRestrictedToKanjis
.groupListsBy((element) => element['senseId'] as int);
final groupedSenseRestrictedToReadings = senseRestrictedToReadings
.groupListsBy((element) => element['senseId'] as int);
final groupedSenseSeeAlsos =
senseSeeAlsos.groupListsBy((element) => element['senseId'] as int);
final List<WordSearchSense> result = [];
for (final sense in senses) {
final int senseId = sense['id'] as int;
final antonyms = groupedSenseAntonyms[senseId] ?? [];
final dialects = groupedSenseDialects[senseId] ?? [];
final fields = groupedSenseFields[senseId] ?? [];
final glossaries = groupedSenseGlossaries[senseId] ?? [];
final infos = groupedSenseInfos[senseId] ?? [];
final languageSources = groupedSenseLanguageSources[senseId] ?? [];
final miscs = groupedSenseMiscs[senseId] ?? [];
final pos = groupedSensePOSs[senseId] ?? [];
final restrictedToKanjis = groupedSenseRestrictedToKanjis[senseId] ?? [];
final restrictedToReadings =
groupedSenseRestrictedToReadings[senseId] ?? [];
final seeAlsos = groupedSenseSeeAlsos[senseId] ?? [];
final resultSense = WordSearchSense(
englishDefinitions: glossaries.map((e) => e['phrase'] as String).toList(),
partsOfSpeech:
pos.map((e) => JMdictPOS.fromId(e['pos'] as String)).toList(),
seeAlso: seeAlsos
.map((e) => WordSearchXrefEntry(
entryId: e['xrefEntryId'] as int,
ambiguous: e['ambiguous'] == 1,
))
.toList(),
antonyms: antonyms
.map((e) => WordSearchXrefEntry(
entryId: e['xrefEntryId'] as int,
ambiguous: e['ambiguous'] == 1,
))
.toList(),
restrictedToReading:
restrictedToReadings.map((e) => e['reading'] as String).toList(),
restrictedToKanji:
restrictedToKanjis.map((e) => e['kanji'] as String).toList(),
fields:
fields.map((e) => JMdictField.fromId(e['field'] as String)).toList(),
dialects: dialects
.map((e) => JMdictDialect.fromId(e['dialect'] as String))
.toList(),
misc: miscs.map((e) => JMdictMisc.fromId(e['misc'] as String)).toList(),
info: infos.map((e) => e['info'] as String).toList(),
languageSource:
languageSources.map((e) => e['language'] as String).toList(),
);
result.add(resultSense);
}
return result;
}

View File

@@ -0,0 +1,350 @@
import 'package:jadb/table_names/jmdict.dart';
import 'package:jadb/table_names/tanos_jlpt.dart';
import 'package:sqflite_common/sqflite.dart';
class LinearWordQueryData {
final List<Map<String, Object?>> senses;
final List<Map<String, Object?>> readingElements;
final List<Map<String, Object?>> kanjiElements;
final List<Map<String, Object?>> jlptTags;
final List<Map<String, Object?>> commonEntries;
final List<Map<String, Object?>> senseAntonyms;
final List<Map<String, Object?>> senseDialects;
final List<Map<String, Object?>> senseFields;
final List<Map<String, Object?>> senseGlossaries;
final List<Map<String, Object?>> senseInfos;
final List<Map<String, Object?>> senseLanguageSources;
final List<Map<String, Object?>> senseMiscs;
final List<Map<String, Object?>> sensePOSs;
final List<Map<String, Object?>> senseRestrictedToKanjis;
final List<Map<String, Object?>> senseRestrictedToReadings;
final List<Map<String, Object?>> senseSeeAlsos;
final List<Map<String, Object?>> exampleSentences;
final List<Map<String, Object?>> readingElementInfos;
final List<Map<String, Object?>> readingElementRestrictions;
final List<Map<String, Object?>> kanjiElementInfos;
final LinearWordQueryData? senseAntonymData;
final LinearWordQueryData? senseSeeAlsoData;
const LinearWordQueryData({
required this.senses,
required this.readingElements,
required this.kanjiElements,
required this.jlptTags,
required this.commonEntries,
required this.senseAntonyms,
required this.senseDialects,
required this.senseFields,
required this.senseGlossaries,
required this.senseInfos,
required this.senseLanguageSources,
required this.senseMiscs,
required this.sensePOSs,
required this.senseRestrictedToKanjis,
required this.senseRestrictedToReadings,
required this.senseSeeAlsos,
required this.exampleSentences,
required this.readingElementInfos,
required this.readingElementRestrictions,
required this.kanjiElementInfos,
required this.senseAntonymData,
required this.senseSeeAlsoData,
});
}
Future<LinearWordQueryData> fetchLinearWordQueryData(
DatabaseExecutor connection,
List<int> entryIds, {
bool fetchXrefData = true,
}) async {
late final List<Map<String, Object?>> senses;
final Future<List<Map<String, Object?>>> sensesQuery = connection.query(
JMdictTableNames.sense,
where: 'entryId IN (${List.filled(entryIds.length, '?').join(',')})',
whereArgs: entryIds,
);
late final List<Map<String, Object?>> readingElements;
final Future<List<Map<String, Object?>>> readingelementsQuery = connection
.query(
JMdictTableNames.readingElement,
where: 'entryId IN (${List.filled(entryIds.length, '?').join(',')})',
whereArgs: entryIds,
orderBy: 'orderNum',
);
late final List<Map<String, Object?>> kanjiElements;
final Future<List<Map<String, Object?>>> kanjielementsQuery = connection
.query(
JMdictTableNames.kanjiElement,
where: 'entryId IN (${List.filled(entryIds.length, '?').join(',')})',
whereArgs: entryIds,
orderBy: 'orderNum',
);
late final List<Map<String, Object?>> jlptTags;
final Future<List<Map<String, Object?>>> jlpttagsQuery = connection.query(
TanosJLPTTableNames.jlptTag,
where: 'entryId IN (${List.filled(entryIds.length, '?').join(',')})',
whereArgs: entryIds,
);
late final List<Map<String, Object?>> commonEntries;
final Future<List<Map<String, Object?>>> commonentriesQuery = connection
.query(
'JMdict_EntryCommon',
where: 'entryId IN (${List.filled(entryIds.length, '?').join(',')})',
whereArgs: entryIds,
);
await Future.wait([
sensesQuery.then((value) => senses = value),
readingelementsQuery.then((value) => readingElements = value),
kanjielementsQuery.then((value) => kanjiElements = value),
jlpttagsQuery.then((value) => jlptTags = value),
commonentriesQuery.then((value) => commonEntries = value),
]);
// Sense queries
final senseIds = senses.map((sense) => sense['senseId'] as int).toList();
late final List<Map<String, Object?>> senseAntonyms;
final Future<List<Map<String, Object?>>> senseantonymsQuery = connection
.rawQuery(
"""
SELECT
"${JMdictTableNames.senseAntonyms}".senseId,
"${JMdictTableNames.senseAntonyms}".ambiguous,
"${JMdictTableNames.senseAntonyms}".xrefEntryId,
"JMdict_BaseAndFurigana"."base",
"JMdict_BaseAndFurigana"."furigana"
FROM "${JMdictTableNames.senseAntonyms}"
JOIN "JMdict_BaseAndFurigana"
ON "${JMdictTableNames.senseAntonyms}"."xrefEntryId" = "JMdict_BaseAndFurigana"."entryId"
WHERE
"senseId" IN (${List.filled(senseIds.length, '?').join(',')})
AND "JMdict_BaseAndFurigana"."isFirst"
ORDER BY
"${JMdictTableNames.senseAntonyms}"."senseId",
"${JMdictTableNames.senseAntonyms}"."xrefEntryId"
""",
[...senseIds],
);
late final List<Map<String, Object?>> senseDialects;
final Future<List<Map<String, Object?>>> sensedialectsQuery = connection
.query(
JMdictTableNames.senseDialect,
where: 'senseId IN (${List.filled(senseIds.length, '?').join(',')})',
whereArgs: senseIds,
);
late final List<Map<String, Object?>> senseFields;
final Future<List<Map<String, Object?>>> sensefieldsQuery = connection.query(
JMdictTableNames.senseField,
where: 'senseId IN (${List.filled(senseIds.length, '?').join(',')})',
whereArgs: senseIds,
);
late final List<Map<String, Object?>> senseGlossaries;
final Future<List<Map<String, Object?>>> senseglossariesQuery = connection
.query(
JMdictTableNames.senseGlossary,
where: 'senseId IN (${List.filled(senseIds.length, '?').join(',')})',
whereArgs: senseIds,
);
late final List<Map<String, Object?>> senseInfos;
final Future<List<Map<String, Object?>>> senseinfosQuery = connection.query(
JMdictTableNames.senseInfo,
where: 'senseId IN (${List.filled(senseIds.length, '?').join(',')})',
whereArgs: senseIds,
);
late final List<Map<String, Object?>> senseLanguageSources;
final Future<List<Map<String, Object?>>> senselanguagesourcesQuery =
connection.query(
JMdictTableNames.senseLanguageSource,
where: 'senseId IN (${List.filled(senseIds.length, '?').join(',')})',
whereArgs: senseIds,
);
late final List<Map<String, Object?>> senseMiscs;
final Future<List<Map<String, Object?>>> sensemiscsQuery = connection.query(
JMdictTableNames.senseMisc,
where: 'senseId IN (${List.filled(senseIds.length, '?').join(',')})',
whereArgs: senseIds,
);
late final List<Map<String, Object?>> sensePOSs;
final Future<List<Map<String, Object?>>> sensepossQuery = connection.query(
JMdictTableNames.sensePOS,
where: 'senseId IN (${List.filled(senseIds.length, '?').join(',')})',
whereArgs: senseIds,
);
late final List<Map<String, Object?>> senseRestrictedToKanjis;
final Future<List<Map<String, Object?>>> senserestrictedtokanjisQuery =
connection.query(
JMdictTableNames.senseRestrictedToKanji,
where: 'senseId IN (${List.filled(senseIds.length, '?').join(',')})',
whereArgs: senseIds,
);
late final List<Map<String, Object?>> senseRestrictedToReadings;
final Future<List<Map<String, Object?>>> senserestrictedtoreadingsQuery =
connection.query(
JMdictTableNames.senseRestrictedToReading,
where: 'senseId IN (${List.filled(senseIds.length, '?').join(',')})',
whereArgs: senseIds,
);
late final List<Map<String, Object?>> senseSeeAlsos;
final Future<List<Map<String, Object?>>> senseseealsosQuery = connection
.rawQuery(
"""
SELECT
"${JMdictTableNames.senseSeeAlso}"."senseId",
"${JMdictTableNames.senseSeeAlso}"."ambiguous",
"${JMdictTableNames.senseSeeAlso}"."xrefEntryId",
"JMdict_BaseAndFurigana"."base",
"JMdict_BaseAndFurigana"."furigana"
FROM "${JMdictTableNames.senseSeeAlso}"
JOIN "JMdict_BaseAndFurigana"
ON "${JMdictTableNames.senseSeeAlso}"."xrefEntryId" = "JMdict_BaseAndFurigana"."entryId"
WHERE
"senseId" IN (${List.filled(senseIds.length, '?').join(',')})
AND "JMdict_BaseAndFurigana"."isFirst"
ORDER BY
"${JMdictTableNames.senseSeeAlso}"."senseId",
"${JMdictTableNames.senseSeeAlso}"."xrefEntryId"
""",
[...senseIds],
);
late final List<Map<String, Object?>> exampleSentences;
final Future<List<Map<String, Object?>>> examplesentencesQuery = connection
.query(
'JMdict_ExampleSentence',
where: 'senseId IN (${List.filled(senseIds.length, '?').join(',')})',
whereArgs: senseIds,
);
// Reading queries
final readingIds = readingElements
.map((element) => element['elementId'] as int)
.toList();
late final List<Map<String, Object?>> readingElementInfos;
final Future<List<Map<String, Object?>>> readingelementinfosQuery =
connection.query(
JMdictTableNames.readingInfo,
where:
'(elementId) IN (${List.filled(readingIds.length, '?').join(',')})',
whereArgs: readingIds,
);
late final List<Map<String, Object?>> readingElementRestrictions;
final Future<List<Map<String, Object?>>> readingelementrestrictionsQuery =
connection.query(
JMdictTableNames.readingRestriction,
where:
'(elementId) IN (${List.filled(readingIds.length, '?').join(',')})',
whereArgs: readingIds,
);
// Kanji queries
final kanjiIds = kanjiElements
.map((element) => element['elementId'] as int)
.toList();
late final List<Map<String, Object?>> kanjiElementInfos;
final Future<List<Map<String, Object?>>> kanjielementinfosQuery = connection
.query(
JMdictTableNames.kanjiInfo,
where:
'(elementId) IN (${List.filled(kanjiIds.length, '?').join(',')})',
whereArgs: kanjiIds,
);
// Xref data queries
await Future.wait([
senseantonymsQuery.then((value) => senseAntonyms = value),
senseseealsosQuery.then((value) => senseSeeAlsos = value),
]);
late final LinearWordQueryData? senseAntonymData;
final Future<LinearWordQueryData?> senseantonymdataQuery =
fetchXrefData
? fetchLinearWordQueryData(
connection,
senseAntonyms
.map((antonym) => antonym['xrefEntryId'] as int)
.toList(),
fetchXrefData: false,
)
: Future.value(null);
late final LinearWordQueryData? senseSeeAlsoData;
final Future<LinearWordQueryData?> senseseealsodataQuery =
fetchXrefData
? fetchLinearWordQueryData(
connection,
senseSeeAlsos.map((seeAlso) => seeAlso['xrefEntryId'] as int).toList(),
fetchXrefData: false,
)
: Future.value(null);
await Future.wait([
sensedialectsQuery.then((value) => senseDialects = value),
sensefieldsQuery.then((value) => senseFields = value),
senseglossariesQuery.then((value) => senseGlossaries = value),
senseinfosQuery.then((value) => senseInfos = value),
senselanguagesourcesQuery.then((value) => senseLanguageSources = value),
sensemiscsQuery.then((value) => senseMiscs = value),
sensepossQuery.then((value) => sensePOSs = value),
senserestrictedtokanjisQuery.then(
(value) => senseRestrictedToKanjis = value,
),
senserestrictedtoreadingsQuery.then(
(value) => senseRestrictedToReadings = value,
),
examplesentencesQuery.then((value) => exampleSentences = value),
readingelementinfosQuery.then((value) => readingElementInfos = value),
readingelementrestrictionsQuery.then(
(value) => readingElementRestrictions = value,
),
kanjielementinfosQuery.then((value) => kanjiElementInfos = value),
senseantonymdataQuery.then((value) => senseAntonymData = value),
senseseealsodataQuery.then((value) => senseSeeAlsoData = value),
]);
return LinearWordQueryData(
senses: senses,
readingElements: readingElements,
kanjiElements: kanjiElements,
jlptTags: jlptTags,
commonEntries: commonEntries,
senseAntonyms: senseAntonyms,
senseDialects: senseDialects,
senseFields: senseFields,
senseGlossaries: senseGlossaries,
senseInfos: senseInfos,
senseLanguageSources: senseLanguageSources,
senseMiscs: senseMiscs,
sensePOSs: sensePOSs,
senseRestrictedToKanjis: senseRestrictedToKanjis,
senseRestrictedToReadings: senseRestrictedToReadings,
senseSeeAlsos: senseSeeAlsos,
exampleSentences: exampleSentences,
readingElementInfos: readingElementInfos,
readingElementRestrictions: readingElementRestrictions,
kanjiElementInfos: kanjiElementInfos,
senseAntonymData: senseAntonymData,
senseSeeAlsoData: senseSeeAlsoData,
);
}

View File

@@ -0,0 +1,311 @@
import 'package:jadb/search/word_search/word_search.dart';
import 'package:jadb/table_names/jmdict.dart';
import 'package:jadb/util/text_filtering.dart';
import 'package:sqflite_common/sqlite_api.dart';
class ScoredEntryId {
final int entryId;
final int score;
const ScoredEntryId(this.entryId, this.score);
}
SearchMode _determineSearchMode(String word) {
final bool containsKanji = kanjiRegex.hasMatch(word);
final bool containsAscii = RegExp(r'[A-Za-z]').hasMatch(word);
if (containsKanji && containsAscii) {
return SearchMode.MixedKanji;
} else if (containsKanji) {
return SearchMode.Kanji;
} else if (containsAscii) {
return SearchMode.English;
} else if (word.contains(hiraganaRegex) || word.contains(katakanaRegex)) {
return SearchMode.Kana;
} else {
return SearchMode.MixedKana;
}
}
/// FTS reacts to certain characters, so we should filter them out.
String _filterFTSSensitiveCharacters(String word) {
return word
.replaceAll('.', '')
.replaceAll('-', '')
.replaceAll('*', '')
.replaceAll('+', '')
.replaceAll('(', '')
.replaceAll(')', '')
.replaceAll('^', '')
.replaceAll('"', '');
}
(String, List<Object?>) _kanjiReadingTemplate(
String tableName,
String word, {
int? pageSize,
int? offset,
bool countOnly = false,
}) {
assert(
tableName == JMdictTableNames.kanjiElement ||
tableName == JMdictTableNames.readingElement,
);
assert(!countOnly || pageSize == null);
assert(!countOnly || offset == null);
assert(pageSize == null || pageSize > 0);
assert(offset == null || offset >= 0);
assert(
offset == null || pageSize != null,
'Offset should only be used with pageSize set',
);
return (
'''
WITH
fts_results AS (
SELECT DISTINCT
"$tableName"."entryId",
100
+ (("${tableName}FTS"."reading" = ?) * 10000)
+ "JMdict_EntryScore"."score"
AS "score"
FROM "${tableName}FTS"
JOIN "$tableName" USING ("elementId")
JOIN "JMdict_EntryScore" USING ("elementId")
WHERE "${tableName}FTS"."reading" MATCH ? || '*'
AND "JMdict_EntryScore"."type" = '${tableName == JMdictTableNames.kanjiElement ? 'k' : 'r'}'
),
non_fts_results AS (
SELECT DISTINCT
"$tableName"."entryId",
50
+ "JMdict_EntryScore"."score"
AS "score"
FROM "$tableName"
JOIN "JMdict_EntryScore" USING ("elementId")
WHERE "reading" LIKE '%' || ? || '%'
AND "$tableName"."entryId" NOT IN (SELECT "entryId" FROM "fts_results")
AND "JMdict_EntryScore"."type" = '${tableName == JMdictTableNames.kanjiElement ? 'k' : 'r'}'
)
SELECT ${countOnly ? 'COUNT(DISTINCT "entryId") AS count' : '"entryId", MAX("score") AS "score"'}
FROM (
SELECT * FROM "fts_results"
UNION
SELECT * FROM "non_fts_results"
)
${!countOnly ? 'GROUP BY "entryId"' : ''}
${!countOnly ? 'ORDER BY "score" DESC, "entryId" ASC' : ''}
${pageSize != null ? 'LIMIT ?' : ''}
${offset != null ? 'OFFSET ?' : ''}
'''
.trim(),
[
_filterFTSSensitiveCharacters(word),
_filterFTSSensitiveCharacters(word),
_filterFTSSensitiveCharacters(word),
?pageSize,
?offset,
],
);
}
Future<List<ScoredEntryId>> _queryKanji(
DatabaseExecutor connection,
String word,
int? pageSize,
int? offset,
) {
final (query, args) = _kanjiReadingTemplate(
JMdictTableNames.kanjiElement,
word,
pageSize: pageSize,
offset: offset,
);
return connection
.rawQuery(query, args)
.then(
(result) => result
.map(
(row) =>
ScoredEntryId(row['entryId'] as int, row['score'] as int),
)
.toList(),
);
}
Future<int> _queryKanjiCount(DatabaseExecutor connection, String word) {
final (query, args) = _kanjiReadingTemplate(
JMdictTableNames.kanjiElement,
word,
countOnly: true,
);
return connection
.rawQuery(query, args)
.then((result) => result.firstOrNull?['count'] as int? ?? 0);
}
Future<List<ScoredEntryId>> _queryKana(
DatabaseExecutor connection,
String word,
int? pageSize,
int? offset,
) {
final (query, args) = _kanjiReadingTemplate(
JMdictTableNames.readingElement,
word,
pageSize: pageSize,
offset: offset,
);
return connection
.rawQuery(query, args)
.then(
(result) => result
.map(
(row) =>
ScoredEntryId(row['entryId'] as int, row['score'] as int),
)
.toList(),
);
}
Future<int> _queryKanaCount(DatabaseExecutor connection, String word) {
final (query, args) = _kanjiReadingTemplate(
JMdictTableNames.readingElement,
word,
countOnly: true,
);
return connection
.rawQuery(query, args)
.then((result) => result.firstOrNull?['count'] as int? ?? 0);
}
Future<List<ScoredEntryId>> _queryEnglish(
DatabaseExecutor connection,
String word,
int? pageSize,
int? offset,
) async {
assert(pageSize == null || pageSize > 0);
assert(offset == null || offset >= 0);
assert(
offset == null || pageSize != null,
'Offset should only be used with pageSize set',
);
final result = await connection.rawQuery(
'''
SELECT
"${JMdictTableNames.sense}"."entryId",
MAX("JMdict_EntryScore"."score")
+ (("${JMdictTableNames.senseGlossary}"."phrase" = ? AND "${JMdictTableNames.sense}"."orderNum" = 1) * 50)
+ (("${JMdictTableNames.senseGlossary}"."phrase" = ? AND "${JMdictTableNames.sense}"."orderNum" = 2) * 30)
+ (("${JMdictTableNames.senseGlossary}"."phrase" = ?) * 20)
as "score"
FROM "${JMdictTableNames.senseGlossary}"
JOIN "${JMdictTableNames.sense}" USING ("senseId")
JOIN "JMdict_EntryScore" USING ("entryId")
WHERE "${JMdictTableNames.senseGlossary}"."phrase" LIKE ?
GROUP BY "JMdict_EntryScore"."entryId"
ORDER BY
"score" DESC,
"${JMdictTableNames.sense}"."entryId" ASC
LIMIT ?
OFFSET ?
'''
.trim(),
[word, word, word, '%${word.replaceAll('%', '')}%', pageSize, offset],
);
return result
.map((row) => ScoredEntryId(row['entryId'] as int, row['score'] as int))
.toList();
}
Future<int> _queryEnglishCount(DatabaseExecutor connection, String word) async {
final result = await connection.rawQuery(
'''
SELECT
COUNT(DISTINCT "${JMdictTableNames.sense}"."entryId") AS "count"
FROM "${JMdictTableNames.senseGlossary}"
JOIN "${JMdictTableNames.sense}" USING ("senseId")
WHERE "${JMdictTableNames.senseGlossary}"."phrase" LIKE ?
'''
.trim(),
['%$word%'],
);
return result.first['count'] as int;
}
Future<List<ScoredEntryId>> fetchEntryIds(
DatabaseExecutor connection,
String word,
SearchMode searchMode,
int? pageSize,
int? offset,
) async {
if (searchMode == SearchMode.Auto) {
searchMode = _determineSearchMode(word);
}
assert(word.isNotEmpty, 'Word should not be empty when fetching entry IDs');
late final List<ScoredEntryId> entryIds;
switch (searchMode) {
case SearchMode.Kanji:
entryIds = await _queryKanji(connection, word, pageSize, offset);
break;
case SearchMode.Kana:
entryIds = await _queryKana(connection, word, pageSize, offset);
break;
case SearchMode.English:
entryIds = await _queryEnglish(connection, word, pageSize, offset);
break;
case SearchMode.MixedKana:
case SearchMode.MixedKanji:
default:
throw UnimplementedError('Search mode $searchMode is not implemented');
}
return entryIds;
}
Future<int?> fetchEntryIdCount(
DatabaseExecutor connection,
String word,
SearchMode searchMode,
) async {
if (searchMode == SearchMode.Auto) {
searchMode = _determineSearchMode(word);
}
assert(word.isNotEmpty, 'Word should not be empty when fetching entry IDs');
late final int? entryIdCount;
switch (searchMode) {
case SearchMode.Kanji:
entryIdCount = await _queryKanjiCount(connection, word);
break;
case SearchMode.Kana:
entryIdCount = await _queryKanaCount(connection, word);
break;
case SearchMode.English:
entryIdCount = await _queryEnglishCount(connection, word);
break;
case SearchMode.MixedKana:
case SearchMode.MixedKanji:
default:
throw UnimplementedError('Search mode $searchMode is not implemented');
}
return entryIdCount;
}

View File

@@ -0,0 +1,348 @@
import 'package:collection/collection.dart';
import 'package:jadb/models/common/jlpt_level.dart';
import 'package:jadb/models/jmdict/jmdict_dialect.dart';
import 'package:jadb/models/jmdict/jmdict_field.dart';
import 'package:jadb/models/jmdict/jmdict_kanji_info.dart';
import 'package:jadb/models/jmdict/jmdict_misc.dart';
import 'package:jadb/models/jmdict/jmdict_pos.dart';
import 'package:jadb/models/jmdict/jmdict_reading_info.dart';
import 'package:jadb/models/word_search/word_search_result.dart';
import 'package:jadb/models/word_search/word_search_ruby.dart';
import 'package:jadb/models/word_search/word_search_sense.dart';
import 'package:jadb/models/word_search/word_search_sense_language_source.dart';
import 'package:jadb/models/word_search/word_search_sources.dart';
import 'package:jadb/models/word_search/word_search_xref_entry.dart';
import 'package:jadb/search/word_search/data_query.dart';
import 'package:jadb/search/word_search/entry_id_query.dart';
List<WordSearchResult> regroupWordSearchResults({
required List<ScoredEntryId> entryIds,
required LinearWordQueryData linearWordQueryData,
}) {
final List<WordSearchResult> results = [];
final commonEntryIds = linearWordQueryData.commonEntries
.map((entry) => entry['entryId'] as int)
.toSet();
for (final scoredEntryId in entryIds) {
final List<Map<String, Object?>> entryReadingElements = linearWordQueryData
.readingElements
.where((element) => element['entryId'] == scoredEntryId.entryId)
.toList();
final List<Map<String, Object?>> entryKanjiElements = linearWordQueryData
.kanjiElements
.where((element) => element['entryId'] == scoredEntryId.entryId)
.toList();
final List<Map<String, Object?>> entryJlptTags = linearWordQueryData
.jlptTags
.where((element) => element['entryId'] == scoredEntryId.entryId)
.toList();
final jlptLevel =
entryJlptTags
.map((e) => JlptLevel.fromString(e['jlptLevel'] as String?))
.sorted((a, b) => b.compareTo(a))
.firstOrNull ??
JlptLevel.none;
final isCommon = commonEntryIds.contains(scoredEntryId.entryId);
final List<Map<String, Object?>> entrySenses = linearWordQueryData.senses
.where((element) => element['entryId'] == scoredEntryId.entryId)
.toList();
final GroupedWordResult entryReadingElementsGrouped = _regroup_words(
entryId: scoredEntryId.entryId,
readingElements: entryReadingElements,
kanjiElements: entryKanjiElements,
readingElementInfos: linearWordQueryData.readingElementInfos,
readingElementRestrictions:
linearWordQueryData.readingElementRestrictions,
kanjiElementInfos: linearWordQueryData.kanjiElementInfos,
);
final List<WordSearchSense> entrySensesGrouped = _regroup_senses(
senses: entrySenses,
senseAntonyms: linearWordQueryData.senseAntonyms,
senseDialects: linearWordQueryData.senseDialects,
senseFields: linearWordQueryData.senseFields,
senseGlossaries: linearWordQueryData.senseGlossaries,
senseInfos: linearWordQueryData.senseInfos,
senseLanguageSources: linearWordQueryData.senseLanguageSources,
senseMiscs: linearWordQueryData.senseMiscs,
sensePOSs: linearWordQueryData.sensePOSs,
senseRestrictedToKanjis: linearWordQueryData.senseRestrictedToKanjis,
senseRestrictedToReadings: linearWordQueryData.senseRestrictedToReadings,
senseSeeAlsos: linearWordQueryData.senseSeeAlsos,
exampleSentences: linearWordQueryData.exampleSentences,
senseSeeAlsosXrefData: linearWordQueryData.senseSeeAlsoData,
senseAntonymsXrefData: linearWordQueryData.senseAntonymData,
);
results.add(
WordSearchResult(
score: scoredEntryId.score,
entryId: scoredEntryId.entryId,
isCommon: isCommon,
japanese: entryReadingElementsGrouped.rubys,
kanjiInfo: entryReadingElementsGrouped.kanjiInfos,
readingInfo: entryReadingElementsGrouped.readingInfos,
senses: entrySensesGrouped,
jlptLevel: jlptLevel,
sources: const WordSearchSources(jmdict: true, jmnedict: false),
),
);
}
return results;
}
class GroupedWordResult {
final List<WordSearchRuby> rubys;
final Map<String, JMdictReadingInfo> readingInfos;
final Map<String, JMdictKanjiInfo> kanjiInfos;
const GroupedWordResult({
required this.rubys,
required this.readingInfos,
required this.kanjiInfos,
});
}
GroupedWordResult _regroup_words({
required int entryId,
required List<Map<String, Object?>> kanjiElements,
required List<Map<String, Object?>> kanjiElementInfos,
required List<Map<String, Object?>> readingElements,
required List<Map<String, Object?>> readingElementInfos,
required List<Map<String, Object?>> readingElementRestrictions,
}) {
final List<WordSearchRuby> rubys = [];
final kanjiElements_ = kanjiElements
.where((element) => element['entryId'] == entryId)
.toList();
final readingElements_ = readingElements
.where((element) => element['entryId'] == entryId)
.toList();
final readingElementRestrictions_ = readingElementRestrictions
.where((element) => element['entryId'] == entryId)
.toList();
for (final readingElement in readingElements_) {
if (readingElement['doesNotMatchKanji'] == 1 || kanjiElements_.isEmpty) {
final ruby = WordSearchRuby(base: readingElement['reading'] as String);
rubys.add(ruby);
continue;
}
for (final kanjiElement in kanjiElements_) {
final kanji = kanjiElement['reading'] as String;
final reading = readingElement['reading'] as String;
final restrictions = readingElementRestrictions_
.where((element) => element['reading'] == reading)
.toList();
if (restrictions.isNotEmpty &&
!restrictions.any((element) => element['restriction'] == kanji)) {
continue;
}
final ruby = WordSearchRuby(base: kanji, furigana: reading);
rubys.add(ruby);
}
}
assert(rubys.isNotEmpty, 'No readings found for entryId: $entryId');
final Map<int, String> readingElementIdsToReading = {
for (final element in readingElements_)
element['elementId'] as int: element['reading'] as String,
};
final Map<int, String> kanjiElementIdsToReading = {
for (final element in kanjiElements_)
element['elementId'] as int: element['reading'] as String,
};
final readingElementInfos_ = readingElementInfos
.where((element) => element['entryId'] == entryId)
.toList();
final kanjiElementInfos_ = kanjiElementInfos
.where((element) => element['entryId'] == entryId)
.toList();
return GroupedWordResult(
rubys: rubys,
readingInfos: {
for (final rei in readingElementInfos_)
readingElementIdsToReading[rei['elementId'] as int]!:
JMdictReadingInfo.fromId(rei['info'] as String),
},
kanjiInfos: {
for (final kei in kanjiElementInfos_)
kanjiElementIdsToReading[kei['elementId'] as int]!:
JMdictKanjiInfo.fromId(kei['info'] as String),
},
);
}
List<WordSearchSense> _regroup_senses({
required List<Map<String, Object?>> senses,
required List<Map<String, Object?>> senseAntonyms,
required List<Map<String, Object?>> senseDialects,
required List<Map<String, Object?>> senseFields,
required List<Map<String, Object?>> senseGlossaries,
required List<Map<String, Object?>> senseInfos,
required List<Map<String, Object?>> senseLanguageSources,
required List<Map<String, Object?>> senseMiscs,
required List<Map<String, Object?>> sensePOSs,
required List<Map<String, Object?>> senseRestrictedToKanjis,
required List<Map<String, Object?>> senseRestrictedToReadings,
required List<Map<String, Object?>> senseSeeAlsos,
required List<Map<String, Object?>> exampleSentences,
required LinearWordQueryData? senseSeeAlsosXrefData,
required LinearWordQueryData? senseAntonymsXrefData,
}) {
final groupedSenseAntonyms = senseAntonyms.groupListsBy(
(element) => element['senseId'] as int,
);
final groupedSenseDialects = senseDialects.groupListsBy(
(element) => element['senseId'] as int,
);
final groupedSenseFields = senseFields.groupListsBy(
(element) => element['senseId'] as int,
);
final groupedSenseGlossaries = senseGlossaries.groupListsBy(
(element) => element['senseId'] as int,
);
final groupedSenseInfos = senseInfos.groupListsBy(
(element) => element['senseId'] as int,
);
final groupedSenseLanguageSources = senseLanguageSources.groupListsBy(
(element) => element['senseId'] as int,
);
final groupedSenseMiscs = senseMiscs.groupListsBy(
(element) => element['senseId'] as int,
);
final groupedSensePOSs = sensePOSs.groupListsBy(
(element) => element['senseId'] as int,
);
final groupedSenseRestrictedToKanjis = senseRestrictedToKanjis.groupListsBy(
(element) => element['senseId'] as int,
);
final groupedSenseRestrictedToReadings = senseRestrictedToReadings
.groupListsBy((element) => element['senseId'] as int);
final groupedSenseSeeAlsos = senseSeeAlsos.groupListsBy(
(element) => element['senseId'] as int,
);
final List<WordSearchSense> result = [];
for (final sense in senses) {
final int senseId = sense['senseId'] as int;
final antonyms = groupedSenseAntonyms[senseId] ?? [];
final dialects = groupedSenseDialects[senseId] ?? [];
final fields = groupedSenseFields[senseId] ?? [];
final glossaries = groupedSenseGlossaries[senseId] ?? [];
final infos = groupedSenseInfos[senseId] ?? [];
final languageSources = groupedSenseLanguageSources[senseId] ?? [];
final miscs = groupedSenseMiscs[senseId] ?? [];
final pos = groupedSensePOSs[senseId] ?? [];
final restrictedToKanjis = groupedSenseRestrictedToKanjis[senseId] ?? [];
final restrictedToReadings =
groupedSenseRestrictedToReadings[senseId] ?? [];
final seeAlsos = groupedSenseSeeAlsos[senseId] ?? [];
final List<WordSearchResult> seeAlsosWordResults =
senseSeeAlsosXrefData != null
? regroupWordSearchResults(
entryIds: seeAlsos
.map((e) => ScoredEntryId(e['xrefEntryId'] as int, 0))
.toList(),
linearWordQueryData: senseSeeAlsosXrefData,
)
: [];
final List<WordSearchResult> antonymsWordResults =
senseAntonymsXrefData != null
? regroupWordSearchResults(
entryIds: antonyms
.map((e) => ScoredEntryId(e['xrefEntryId'] as int, 0))
.toList(),
linearWordQueryData: senseAntonymsXrefData,
)
: [];
final resultSense = WordSearchSense(
englishDefinitions: glossaries.map((e) => e['phrase'] as String).toList(),
partsOfSpeech: pos
.map((e) => JMdictPOS.fromId(e['pos'] as String))
.toList(),
seeAlso: seeAlsos.asMap().entries.map<WordSearchXrefEntry>((mapEntry) {
final i = mapEntry.key;
final e = mapEntry.value;
return WordSearchXrefEntry(
entryId: e['xrefEntryId'] as int,
baseWord: e['base'] as String,
furigana: e['furigana'] as String?,
ambiguous: e['ambiguous'] == 1,
xrefResult: seeAlsosWordResults.isNotEmpty
? seeAlsosWordResults[i]
: null,
);
}).toList(),
antonyms: antonyms.asMap().entries.map<WordSearchXrefEntry>((mapEntry) {
final i = mapEntry.key;
final e = mapEntry.value;
return WordSearchXrefEntry(
entryId: e['xrefEntryId'] as int,
baseWord: e['base'] as String,
furigana: e['furigana'] as String?,
ambiguous: e['ambiguous'] == 1,
xrefResult: antonymsWordResults.isNotEmpty
? antonymsWordResults[i]
: null,
);
}).toList(),
restrictedToReading: restrictedToReadings
.map((e) => e['reading'] as String)
.toList(),
restrictedToKanji: restrictedToKanjis
.map((e) => e['kanji'] as String)
.toList(),
fields: fields
.map((e) => JMdictField.fromId(e['field'] as String))
.toList(),
dialects: dialects
.map((e) => JMdictDialect.fromId(e['dialect'] as String))
.toList(),
misc: miscs.map((e) => JMdictMisc.fromId(e['misc'] as String)).toList(),
info: infos.map((e) => e['info'] as String).toList(),
languageSource: languageSources
.map(
(e) => WordSearchSenseLanguageSource(
language: e['language'] as String,
phrase: e['phrase'] as String?,
fullyDescribesSense: e['fullyDescribesSense'] == 1,
constructedFromSmallerWords:
e['constructedFromSmallerWords'] == 1,
),
)
.toList(),
);
result.add(resultSense);
}
return result;
}

View File

@@ -0,0 +1,131 @@
// TODO: Support globs
// TODO: Support tags
// TODO: Prefer original kana type when sorting results
// TODO: Support mixing kana and romaji
//
import 'package:jadb/models/word_search/word_search_result.dart';
import 'package:jadb/search/word_search/data_query.dart';
import 'package:jadb/search/word_search/entry_id_query.dart';
import 'package:jadb/search/word_search/regrouping.dart';
import 'package:jadb/table_names/jmdict.dart';
import 'package:sqflite_common/sqlite_api.dart';
enum SearchMode { Auto, English, Kanji, MixedKanji, Kana, MixedKana }
Future<List<WordSearchResult>?> searchWordWithDbConnection(
DatabaseExecutor connection,
String word, {
SearchMode searchMode = SearchMode.Auto,
int page = 0,
int? pageSize,
}) async {
if (word.isEmpty) {
return null;
}
final int? offset = pageSize != null ? page * pageSize : null;
final List<ScoredEntryId> entryIds = await fetchEntryIds(
connection,
word,
searchMode,
pageSize,
offset,
);
if (entryIds.isEmpty) {
// TODO: try conjugation search
return [];
}
final LinearWordQueryData linearWordQueryData =
await fetchLinearWordQueryData(
connection,
entryIds.map((e) => e.entryId).toList(),
);
final result = regroupWordSearchResults(
entryIds: entryIds,
linearWordQueryData: linearWordQueryData,
);
return result;
}
Future<int?> searchWordCountWithDbConnection(
DatabaseExecutor connection,
String word, {
SearchMode searchMode = SearchMode.Auto,
}) async {
if (word.isEmpty) {
return null;
}
final int? entryIdCount = await fetchEntryIdCount(
connection,
word,
searchMode,
);
return entryIdCount;
}
Future<WordSearchResult?> getWordByIdWithDbConnection(
DatabaseExecutor connection,
int id,
) async {
if (id <= 0) {
return null;
}
final exists = await connection
.rawQuery(
'SELECT EXISTS(SELECT 1 FROM "${JMdictTableNames.entry}" WHERE "entryId" = ?)',
[id],
)
.then((value) => value.isNotEmpty && value.first.values.first == 1);
if (!exists) {
return null;
}
final LinearWordQueryData linearWordQueryData =
await fetchLinearWordQueryData(connection, [id]);
final result = regroupWordSearchResults(
entryIds: [ScoredEntryId(id, 0)],
linearWordQueryData: linearWordQueryData,
);
assert(
result.length == 1,
'Expected exactly one result for entryId $id, but got ${result.length}',
);
return result.firstOrNull;
}
Future<Map<int, WordSearchResult>> getWordsByIdsWithDbConnection(
DatabaseExecutor connection,
Set<int> ids,
) async {
if (ids.isEmpty) {
return {};
}
final LinearWordQueryData linearWordQueryData =
await fetchLinearWordQueryData(connection, ids.toList());
final List<ScoredEntryId> entryIds = ids
.map((id) => ScoredEntryId(id, 0)) // Score is not used here
.toList();
final results = regroupWordSearchResults(
entryIds: entryIds,
linearWordQueryData: linearWordQueryData,
);
return {for (var r in results) r.entryId: r};
}

View File

@@ -1,7 +1,5 @@
abstract class JMdictTableNames {
static const String entry = 'JMdict_Entry';
static const String entryByKana = 'JMdict_EntryByKana';
static const String entryByEnglish = 'JMdict_EntryByEnglish';
static const String kanjiElement = 'JMdict_KanjiElement';
static const String kanjiInfo = 'JMdict_KanjiElementInfo';
static const String readingElement = 'JMdict_ReadingElement';
@@ -22,25 +20,23 @@ abstract class JMdictTableNames {
static const String senseSeeAlso = 'JMdict_SenseSeeAlso';
static Set<String> get allTables => {
entry,
entryByKana,
entryByEnglish,
kanjiElement,
kanjiInfo,
readingElement,
readingInfo,
readingRestriction,
sense,
senseAntonyms,
senseDialect,
senseField,
senseGlossary,
senseInfo,
senseMisc,
sensePOS,
senseLanguageSource,
senseRestrictedToKanji,
senseRestrictedToReading,
senseSeeAlso
};
entry,
kanjiElement,
kanjiInfo,
readingElement,
readingInfo,
readingRestriction,
sense,
senseAntonyms,
senseDialect,
senseField,
senseGlossary,
senseInfo,
senseMisc,
sensePOS,
senseLanguageSource,
senseRestrictedToKanji,
senseRestrictedToReading,
senseSeeAlso,
};
}

View File

@@ -17,19 +17,19 @@ abstract class KANJIDICTableNames {
static const String nanori = 'KANJIDIC_Nanori';
static Set<String> get allTables => {
character,
radicalName,
codepoint,
radical,
strokeMiscount,
variant,
dictionaryReference,
dictionaryReferenceMoro,
queryCode,
reading,
kunyomi,
onyomi,
meaning,
nanori
};
character,
radicalName,
codepoint,
radical,
strokeMiscount,
variant,
dictionaryReference,
dictionaryReferenceMoro,
queryCode,
reading,
kunyomi,
onyomi,
meaning,
nanori,
};
}

View File

@@ -1,7 +1,5 @@
abstract class RADKFILETableNames {
static const String radkfile = 'RADKFILE';
static Set<String> get allTables => {
radkfile,
};
static Set<String> get allTables => {radkfile};
}

View File

@@ -276,28 +276,22 @@ extension on DateTime {
/// See more info here:
/// - https://en.wikipedia.org/wiki/Nanboku-ch%C5%8D_period
/// - http://www.kumamotokokufu-h.ed.jp/kumamoto/bungaku/nengoui.html
String? japaneseEra({bool nanbokuchouPeriodUsesNorth = true}) {
String? japaneseEra() {
throw UnimplementedError('This function is not implemented yet.');
if (this.year < 645) {
if (year < 645) {
return null;
}
if (this.year < periodsNanbokuchouNorth.keys.first.$1) {
if (year < periodsNanbokuchouNorth.keys.first.$1) {
// TODO: find first where year <= this.year and jump one period back.
}
}
String get japaneseWeekdayPrefix => [
'',
'',
'',
'',
'',
'',
'',
][weekday - 1];
String get japaneseWeekdayPrefix =>
['', '', '', '', '', '', ''][weekday - 1];
/// Returns the date in Japanese format.
String japaneseDate({bool showWeekday = false}) => '$month月$day日' + (showWeekday ? '$japaneseWeekdayPrefix' : '');
String japaneseDate({bool showWeekday = false}) =>
'$month月$day日${showWeekday ? '$japaneseWeekdayPrefix' : ''}';
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,241 @@
import 'package:jadb/util/lemmatizer/rules.dart';
enum WordClass {
noun,
ichidanVerb,
godanVerb,
irregularVerb,
iAdjective,
nAdjective,
adverb,
particle,
input,
}
enum LemmatizationRuleType { prefix, suffix }
class LemmatizationRule {
final String name;
final AllomorphPattern pattern;
final WordClass wordClass;
final List<WordClass>? validChildClasses;
final bool terminal;
const LemmatizationRule({
required this.name,
required this.pattern,
required this.wordClass,
this.validChildClasses,
this.terminal = false,
});
bool matches(String word) => pattern.matches(word);
List<String>? apply(String word) => pattern.apply(word);
LemmatizationRule.simple({
required String name,
required String pattern,
required String? replacement,
required WordClass wordClass,
validChildClasses,
terminal = false,
lookAheadBehind = const [''],
LemmatizationRuleType type = LemmatizationRuleType.suffix,
}) : this(
name: name,
pattern: AllomorphPattern(
patterns: {
pattern: replacement != null ? [replacement] : null,
},
type: type,
lookAheadBehind: lookAheadBehind,
),
validChildClasses: validChildClasses,
terminal: terminal,
wordClass: wordClass,
);
}
/// Represents a set of patterns for matching allomorphs in a word.
/// The patterns can be either a prefix or a suffix, and they can include
/// replacement characters for deconjugating into base forms.
class AllomorphPattern {
final List<Pattern> lookAheadBehind;
final Map<String, List<String>?> patterns;
final LemmatizationRuleType type;
const AllomorphPattern({
required this.patterns,
required this.type,
this.lookAheadBehind = const [''],
});
List<(String, Pattern)> get allPatternCombinations {
final combinations = <(String, Pattern)>[];
for (final l in lookAheadBehind) {
for (final p in patterns.keys) {
switch ((type, l is RegExp)) {
case (LemmatizationRuleType.prefix, true):
combinations.add((p, RegExp('^($p)(${(l as RegExp).pattern})')));
break;
case (LemmatizationRuleType.prefix, false):
combinations.add((p, '$p$l'));
break;
case (LemmatizationRuleType.suffix, true):
combinations.add((p, RegExp('(${(l as RegExp).pattern})($p)\$')));
break;
case (LemmatizationRuleType.suffix, false):
combinations.add((p, '$l$p'));
break;
}
}
}
return combinations;
}
bool matches(String word) {
for (final (_, p) in allPatternCombinations) {
if (p is String) {
if (type == LemmatizationRuleType.prefix
? word.startsWith(p)
: word.endsWith(p)) {
return true;
}
} else if (p is RegExp) {
if (p.hasMatch(word)) {
return true;
}
}
}
return false;
}
List<String>? apply(String word) {
for (final (affix, p) in allPatternCombinations) {
switch ((type, p is RegExp)) {
case (LemmatizationRuleType.prefix, true):
final match = (p as RegExp).firstMatch(word);
if (match != null) {
final prefix = match.group(1)!;
assert(prefix == affix);
final suffix = word.substring(prefix.length);
return patterns[prefix] != null
? patterns[prefix]!.map((s) => s + suffix).toList()
: [suffix];
}
break;
case (LemmatizationRuleType.prefix, false):
if (word.startsWith(p as String)) {
return patterns[affix] != null
? patterns[affix]!
.map((s) => s + word.substring(affix.length))
.toList()
: [word.substring(affix.length)];
}
break;
case (LemmatizationRuleType.suffix, true):
final match = (p as RegExp).firstMatch(word);
if (match != null) {
final suffix = match.group(2)!;
assert(suffix == affix);
final prefix = word.substring(0, word.length - suffix.length);
return patterns[suffix] != null
? patterns[suffix]!.map((s) => prefix + s).toList()
: [prefix];
}
break;
case (LemmatizationRuleType.suffix, false):
if (word.endsWith(p as String)) {
final prefix = word.substring(0, word.length - affix.length);
return patterns[affix] != null
? patterns[affix]!.map((s) => prefix + s).toList()
: [prefix];
}
break;
}
}
return null;
}
}
class Lemmatized {
final String original;
final LemmatizationRule rule;
final int variant;
final List<Lemmatized> children;
const Lemmatized({
required this.original,
required this.rule,
this.variant = 0,
this.children = const [],
});
String? get applied {
final applied = rule.apply(original);
if (applied == null || applied.isEmpty) {
return null;
}
return applied[variant];
}
@override
String toString() {
final childrenString = children
.map((c) => ' - ${c.toString().split('\n').join('\n ')}')
.join('\n');
if (children.isEmpty) {
return '$original (${rule.name}) -> ${applied ?? '<null>'}';
} else {
return '$original (${rule.name}) -> ${applied ?? '<null>'}\n$childrenString';
}
}
}
List<Lemmatized> _lemmatize(LemmatizationRule parentRule, String word) {
final children = <Lemmatized>[];
if (parentRule.terminal) {
return children;
}
final filteredLemmatizationRules = parentRule.validChildClasses == null
? lemmatizationRules
: lemmatizationRules.where(
(r) => parentRule.validChildClasses!.contains(r.wordClass),
);
for (final rule in filteredLemmatizationRules) {
if (rule.matches(word)) {
final applied = rule.apply(word);
for (final (i, a) in (applied ?? []).indexed) {
final subChildren = _lemmatize(rule, a);
children.add(
Lemmatized(
original: word,
rule: rule,
variant: i,
children: subChildren,
),
);
}
}
}
return children;
}
Lemmatized lemmatize(String word) {
final inputRule = LemmatizationRule.simple(
name: 'Input',
pattern: '',
replacement: null,
wordClass: WordClass.input,
);
return Lemmatized(
original: word,
rule: inputRule,
children: _lemmatize(inputRule, word),
);
}

View File

@@ -0,0 +1,10 @@
import 'package:jadb/util/lemmatizer/lemmatizer.dart';
import 'package:jadb/util/lemmatizer/rules/godan-verbs.dart';
import 'package:jadb/util/lemmatizer/rules/i-adjectives.dart';
import 'package:jadb/util/lemmatizer/rules/ichidan-verbs.dart';
List<LemmatizationRule> lemmatizationRules = [
...ichidanVerbLemmatizationRules,
...godanVerbLemmatizationRules,
...iAdjectiveLemmatizationRules,
];

View File

@@ -0,0 +1,457 @@
import 'package:jadb/util/lemmatizer/lemmatizer.dart';
List<LemmatizationRule> godanVerbLemmatizationRules = [
LemmatizationRule(
name: 'Godan verb - base form',
terminal: true,
pattern: AllomorphPattern(
patterns: {
'': [''],
'': [''],
'': [''],
'': [''],
'': [''],
'': [''],
'': [''],
'': [''],
'': [''],
},
type: LemmatizationRuleType.suffix,
),
validChildClasses: [WordClass.godanVerb],
wordClass: WordClass.godanVerb,
),
LemmatizationRule(
name: 'Godan verb - negative form',
pattern: AllomorphPattern(
patterns: {
'わない': [''],
'かない': [''],
'がない': [''],
'さない': [''],
'たない': [''],
'なない': [''],
'ばない': [''],
'まない': [''],
'らない': [''],
},
type: LemmatizationRuleType.suffix,
),
validChildClasses: [WordClass.godanVerb],
wordClass: WordClass.godanVerb,
),
LemmatizationRule(
name: 'Godan verb - past form',
pattern: AllomorphPattern(
patterns: {
'した': [''],
'った': ['', '', ''],
'んだ': ['', '', ''],
'いだ': [''],
'いた': [''],
},
type: LemmatizationRuleType.suffix,
),
validChildClasses: [WordClass.godanVerb],
wordClass: WordClass.godanVerb,
),
LemmatizationRule(
name: 'Godan verb - te-form',
pattern: AllomorphPattern(
patterns: {
'いて': ['', ''],
'して': [''],
'って': ['', '', ''],
'んで': ['', '', ''],
},
type: LemmatizationRuleType.suffix,
),
validChildClasses: [WordClass.godanVerb],
wordClass: WordClass.godanVerb,
),
LemmatizationRule(
name: 'Godan verb - te-form with いる',
pattern: AllomorphPattern(
patterns: {
'いている': ['', ''],
'している': [''],
'っている': ['', '', ''],
'んでいる': ['', '', ''],
},
type: LemmatizationRuleType.suffix,
),
validChildClasses: [WordClass.godanVerb],
wordClass: WordClass.godanVerb,
),
LemmatizationRule(
name: 'Godan verb - te-form with いた',
pattern: AllomorphPattern(
patterns: {
'いていた': ['', ''],
'していた': [''],
'っていた': ['', '', ''],
'んでいた': ['', '', ''],
},
type: LemmatizationRuleType.suffix,
),
validChildClasses: [WordClass.godanVerb],
wordClass: WordClass.godanVerb,
),
LemmatizationRule(
name: 'Godan verb - conditional form',
pattern: AllomorphPattern(
patterns: {
'けば': [''],
'げば': [''],
'せば': [''],
'てば': ['', '', ''],
'ねば': [''],
'べば': [''],
'めば': [''],
},
type: LemmatizationRuleType.suffix,
),
validChildClasses: [WordClass.godanVerb],
wordClass: WordClass.godanVerb,
),
LemmatizationRule(
name: 'Godan verb - volitional form',
pattern: AllomorphPattern(
patterns: {
'おう': [''],
'こう': [''],
'ごう': [''],
'そう': [''],
'とう': ['', '', ''],
'のう': [''],
'ぼう': [''],
'もう': [''],
},
type: LemmatizationRuleType.suffix,
),
validChildClasses: [WordClass.godanVerb],
wordClass: WordClass.godanVerb,
),
LemmatizationRule(
name: 'Godan verb - potential form',
pattern: AllomorphPattern(
patterns: {
'ける': [''],
'げる': [''],
'せる': [''],
'てる': ['', '', ''],
'ねる': [''],
'べる': [''],
'める': [''],
},
type: LemmatizationRuleType.suffix,
),
validChildClasses: [WordClass.godanVerb],
wordClass: WordClass.godanVerb,
),
LemmatizationRule(
name: 'Godan verb - passive form',
pattern: AllomorphPattern(
patterns: {
'かれる': [''],
'がれる': [''],
'される': [''],
'たれる': ['', '', ''],
'なれる': [''],
'ばれる': [''],
'まれる': [''],
},
type: LemmatizationRuleType.suffix,
),
validChildClasses: [WordClass.godanVerb],
wordClass: WordClass.godanVerb,
),
LemmatizationRule(
name: 'Godan verb - causative form',
pattern: AllomorphPattern(
patterns: {
'かせる': [''],
'がせる': [''],
'させる': [''],
'たせる': ['', '', ''],
'なせる': [''],
'ばせる': [''],
'ませる': [''],
},
type: LemmatizationRuleType.suffix,
),
validChildClasses: [WordClass.godanVerb],
wordClass: WordClass.godanVerb,
),
LemmatizationRule(
name: 'Godan verb - causative-passive form',
pattern: AllomorphPattern(
patterns: {
'かされる': [''],
'がされる': [''],
'される': [''],
'たされる': ['', '', ''],
'なされる': [''],
'ばされる': [''],
'まされる': [''],
},
type: LemmatizationRuleType.suffix,
),
validChildClasses: [WordClass.godanVerb],
wordClass: WordClass.godanVerb,
),
LemmatizationRule(
name: 'Godan verb - imperative form',
pattern: AllomorphPattern(
patterns: {
'': [''],
'': [''],
'': [''],
'': [''],
'': ['', '', ''],
'': [''],
'': [''],
'': [''],
},
type: LemmatizationRuleType.suffix,
),
validChildClasses: [WordClass.godanVerb],
wordClass: WordClass.godanVerb,
),
LemmatizationRule(
name: 'Godan verb - negative past form',
pattern: AllomorphPattern(
patterns: {
'わなかった': [''],
'かなかった': [''],
'がなかった': [''],
'さなかった': [''],
'たなかった': [''],
'ななかった': [''],
'ばなかった': [''],
'まなかった': [''],
'らなかった': [''],
},
type: LemmatizationRuleType.suffix,
),
validChildClasses: [WordClass.godanVerb],
wordClass: WordClass.godanVerb,
),
LemmatizationRule(
name: 'Godan verb - negative te-form',
pattern: AllomorphPattern(
patterns: {
'わなくて': [''],
'かなくて': [''],
'がなくて': [''],
'さなくて': [''],
'たなくて': [''],
'ななくて': [''],
'ばなくて': [''],
'まなくて': [''],
'らなくて': [''],
},
type: LemmatizationRuleType.suffix,
),
validChildClasses: [WordClass.godanVerb],
wordClass: WordClass.godanVerb,
),
LemmatizationRule(
name: 'Godan verb - negative conditional form',
pattern: AllomorphPattern(
patterns: {
'わなければ': [''],
'かなければ': [''],
'がなければ': [''],
'さなければ': [''],
'たなければ': [''],
'ななければ': [''],
'ばなければ': [''],
'まなければ': [''],
'らなければ': [''],
},
type: LemmatizationRuleType.suffix,
),
validChildClasses: [WordClass.godanVerb],
wordClass: WordClass.godanVerb,
),
LemmatizationRule(
name: 'Godan verb - negative volitional form',
pattern: AllomorphPattern(
patterns: {
'うまい': [''],
'くまい': [''],
'ぐまい': [''],
'すまい': [''],
'つまい': ['', '', ''],
'ぬまい': [''],
'ぶまい': [''],
'むまい': [''],
},
type: LemmatizationRuleType.suffix,
),
validChildClasses: [WordClass.godanVerb],
wordClass: WordClass.godanVerb,
),
LemmatizationRule(
name: 'Godan verb - negative potential form',
pattern: AllomorphPattern(
patterns: {
'けない': [''],
'げない': [''],
'せない': [''],
'てない': ['', '', ''],
'ねない': [''],
'べない': [''],
'めない': [''],
},
type: LemmatizationRuleType.suffix,
),
validChildClasses: [WordClass.godanVerb],
wordClass: WordClass.godanVerb,
),
LemmatizationRule(
name: 'Godan verb - negative passive form',
pattern: AllomorphPattern(
patterns: {
'かれない': [''],
'がれない': [''],
'されない': [''],
'たれない': ['', '', ''],
'なれない': [''],
'ばれない': [''],
'まれない': [''],
},
type: LemmatizationRuleType.suffix,
),
validChildClasses: [WordClass.godanVerb],
wordClass: WordClass.godanVerb,
),
LemmatizationRule(
name: 'Godan verb - negative causative form',
pattern: AllomorphPattern(
patterns: {
'かせない': [''],
'がせない': [''],
'させない': [''],
'たせない': ['', '', ''],
'なせない': [''],
'ばせない': [''],
'ませない': [''],
},
type: LemmatizationRuleType.suffix,
),
validChildClasses: [WordClass.godanVerb],
wordClass: WordClass.godanVerb,
),
LemmatizationRule(
name: 'Godan verb - negative causative-passive form',
pattern: AllomorphPattern(
patterns: {
'かされない': [''],
'がされない': [''],
'されない': [''],
'たされない': ['', '', ''],
'なされない': [''],
'ばされない': [''],
'まされない': [''],
},
type: LemmatizationRuleType.suffix,
),
validChildClasses: [WordClass.godanVerb],
wordClass: WordClass.godanVerb,
),
LemmatizationRule(
name: 'Godan verb - negative imperative form',
pattern: AllomorphPattern(
patterns: {
'うな': [''],
'くな': [''],
'ぐな': [''],
'すな': [''],
'つな': [''],
'ぬな': [''],
'ぶな': [''],
'むな': [''],
'るな': [''],
},
type: LemmatizationRuleType.suffix,
),
validChildClasses: [WordClass.godanVerb],
wordClass: WordClass.godanVerb,
),
LemmatizationRule(
name: 'Godan verb - desire form',
pattern: AllomorphPattern(
patterns: {
'きたい': [''],
'ぎたい': [''],
'したい': [''],
'ちたい': [''],
'にたい': [''],
'びたい': [''],
'みたい': [''],
'りたい': [''],
},
type: LemmatizationRuleType.suffix,
),
validChildClasses: [WordClass.godanVerb],
wordClass: WordClass.godanVerb,
),
LemmatizationRule(
name: 'Godan verb - negative desire form',
pattern: AllomorphPattern(
patterns: {
'いたくない': [''],
'きたくない': [''],
'ぎたくない': [''],
'したくない': [''],
'ちたくない': [''],
'にたくない': [''],
'びたくない': [''],
'みたくない': [''],
'りたくない': [''],
},
type: LemmatizationRuleType.suffix,
),
validChildClasses: [WordClass.godanVerb],
wordClass: WordClass.godanVerb,
),
LemmatizationRule(
name: 'Godan verb - past desire form',
pattern: AllomorphPattern(
patterns: {
'きたかった': [''],
'ぎたかった': [''],
'したかった': [''],
'ちたかった': [''],
'にたかった': [''],
'びたかった': [''],
'みたかった': [''],
'りたかった': [''],
},
type: LemmatizationRuleType.suffix,
),
validChildClasses: [WordClass.godanVerb],
wordClass: WordClass.godanVerb,
),
LemmatizationRule(
name: 'Godan verb - negative past desire form',
pattern: AllomorphPattern(
patterns: {
'いたくなかった': [''],
'きたくなかった': [''],
'ぎたくなかった': [''],
'したくなかった': [''],
'ちたくなかった': [''],
'にたくなかった': [''],
'びたくなかった': [''],
'みたくなかった': [''],
'りたくなかった': [''],
},
type: LemmatizationRuleType.suffix,
),
validChildClasses: [WordClass.godanVerb],
wordClass: WordClass.godanVerb,
),
];

View File

@@ -0,0 +1,61 @@
import 'package:jadb/util/lemmatizer/lemmatizer.dart';
List<LemmatizationRule> iAdjectiveLemmatizationRules = [
LemmatizationRule.simple(
name: 'I adjective - base form',
terminal: true,
pattern: '',
replacement: '',
validChildClasses: [WordClass.iAdjective],
wordClass: WordClass.iAdjective,
),
LemmatizationRule.simple(
name: 'I adjective - negative form',
pattern: 'くない',
replacement: '',
validChildClasses: [WordClass.iAdjective],
wordClass: WordClass.iAdjective,
),
LemmatizationRule.simple(
name: 'I adjective - past form',
pattern: 'かった',
replacement: '',
validChildClasses: [WordClass.iAdjective],
wordClass: WordClass.iAdjective,
),
LemmatizationRule.simple(
name: 'I adjective - negative past form',
pattern: 'くなかった',
replacement: '',
validChildClasses: [WordClass.iAdjective],
wordClass: WordClass.iAdjective,
),
LemmatizationRule.simple(
name: 'I adjective - te-form',
pattern: 'くて',
replacement: '',
validChildClasses: [WordClass.iAdjective],
wordClass: WordClass.iAdjective,
),
LemmatizationRule.simple(
name: 'I adjective - conditional form',
pattern: 'ければ',
replacement: '',
validChildClasses: [WordClass.iAdjective],
wordClass: WordClass.iAdjective,
),
LemmatizationRule.simple(
name: 'I adjective - volitional form',
pattern: 'かろう',
replacement: '',
validChildClasses: [WordClass.iAdjective],
wordClass: WordClass.iAdjective,
),
LemmatizationRule.simple(
name: 'I adjective - continuative form',
pattern: '',
replacement: '',
validChildClasses: [WordClass.iAdjective],
wordClass: WordClass.iAdjective,
),
];

View File

@@ -0,0 +1,241 @@
import 'package:jadb/util/lemmatizer/lemmatizer.dart';
import 'package:jadb/util/text_filtering.dart';
List<Pattern> lookBehinds = [
kanjiRegex,
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
];
List<LemmatizationRule> ichidanVerbLemmatizationRules = [
LemmatizationRule.simple(
name: 'Ichidan verb - base form',
terminal: true,
pattern: '',
replacement: '',
lookAheadBehind: lookBehinds,
validChildClasses: [WordClass.ichidanVerb],
wordClass: WordClass.ichidanVerb,
),
LemmatizationRule.simple(
name: 'Ichidan verb - negative form',
pattern: 'ない',
replacement: '',
lookAheadBehind: lookBehinds,
validChildClasses: [WordClass.ichidanVerb],
wordClass: WordClass.ichidanVerb,
),
LemmatizationRule.simple(
name: 'Ichidan verb - past form',
pattern: '',
replacement: '',
lookAheadBehind: lookBehinds,
validChildClasses: [WordClass.ichidanVerb],
wordClass: WordClass.ichidanVerb,
),
LemmatizationRule.simple(
name: 'Ichidan verb - te-form',
pattern: '',
replacement: '',
lookAheadBehind: lookBehinds,
validChildClasses: [WordClass.ichidanVerb],
wordClass: WordClass.ichidanVerb,
),
LemmatizationRule.simple(
name: 'Ichidan verb - te-form with いる',
pattern: 'ている',
replacement: '',
lookAheadBehind: lookBehinds,
validChildClasses: [WordClass.ichidanVerb],
wordClass: WordClass.ichidanVerb,
),
LemmatizationRule.simple(
name: 'Ichidan verb - te-form with いた',
pattern: 'ていた',
replacement: '',
lookAheadBehind: lookBehinds,
validChildClasses: [WordClass.ichidanVerb],
wordClass: WordClass.ichidanVerb,
),
LemmatizationRule.simple(
name: 'Ichidan verb - conditional form',
pattern: 'れば',
replacement: '',
lookAheadBehind: lookBehinds,
validChildClasses: [WordClass.ichidanVerb],
wordClass: WordClass.ichidanVerb,
),
LemmatizationRule.simple(
name: 'Ichidan verb - volitional form',
pattern: 'よう',
replacement: '',
lookAheadBehind: lookBehinds,
validChildClasses: [WordClass.ichidanVerb],
wordClass: WordClass.ichidanVerb,
),
LemmatizationRule.simple(
name: 'Ichidan verb - potential form',
pattern: 'られる',
replacement: '',
lookAheadBehind: lookBehinds,
validChildClasses: [WordClass.ichidanVerb],
wordClass: WordClass.ichidanVerb,
),
LemmatizationRule.simple(
name: 'Ichidan verb - passive form',
pattern: 'られる',
replacement: '',
lookAheadBehind: lookBehinds,
validChildClasses: [WordClass.ichidanVerb],
wordClass: WordClass.ichidanVerb,
),
LemmatizationRule.simple(
name: 'Ichidan verb - causative form',
pattern: 'させる',
replacement: '',
lookAheadBehind: lookBehinds,
validChildClasses: [WordClass.ichidanVerb],
wordClass: WordClass.ichidanVerb,
),
LemmatizationRule.simple(
name: 'Ichidan verb - causative passive form',
pattern: 'させられる',
replacement: '',
lookAheadBehind: lookBehinds,
validChildClasses: [WordClass.ichidanVerb],
wordClass: WordClass.ichidanVerb,
),
LemmatizationRule.simple(
name: 'Ichidan verb - imperative form',
pattern: '',
replacement: '',
lookAheadBehind: lookBehinds,
validChildClasses: [WordClass.ichidanVerb],
wordClass: WordClass.ichidanVerb,
),
LemmatizationRule.simple(
name: 'Ichidan verb - negative past form',
pattern: 'なかった',
replacement: '',
lookAheadBehind: lookBehinds,
validChildClasses: [WordClass.ichidanVerb],
wordClass: WordClass.ichidanVerb,
),
LemmatizationRule.simple(
name: 'Ichidan verb - negative te-form',
pattern: 'なくて',
replacement: '',
lookAheadBehind: lookBehinds,
validChildClasses: [WordClass.ichidanVerb],
wordClass: WordClass.ichidanVerb,
),
LemmatizationRule.simple(
name: 'Ichidan verb - negative conditional form',
pattern: 'なければ',
replacement: '',
lookAheadBehind: lookBehinds,
validChildClasses: [WordClass.ichidanVerb],
wordClass: WordClass.ichidanVerb,
),
LemmatizationRule.simple(
name: 'Ichidan verb - negative volitional form',
pattern: 'なかろう',
replacement: '',
lookAheadBehind: lookBehinds,
validChildClasses: [WordClass.ichidanVerb],
wordClass: WordClass.ichidanVerb,
),
LemmatizationRule.simple(
name: 'Ichidan verb - negative potential form',
pattern: 'られない',
replacement: '',
lookAheadBehind: lookBehinds,
validChildClasses: [WordClass.ichidanVerb],
wordClass: WordClass.ichidanVerb,
),
LemmatizationRule.simple(
name: 'Ichidan verb - negative passive form',
pattern: 'られない',
replacement: '',
lookAheadBehind: lookBehinds,
validChildClasses: [WordClass.ichidanVerb],
wordClass: WordClass.ichidanVerb,
),
LemmatizationRule.simple(
name: 'Ichidan verb - negative causative form',
pattern: 'させない',
replacement: '',
lookAheadBehind: lookBehinds,
validChildClasses: [WordClass.ichidanVerb],
wordClass: WordClass.ichidanVerb,
),
LemmatizationRule.simple(
name: 'Ichidan verb - negative causative passive form',
pattern: 'させられない',
replacement: '',
lookAheadBehind: lookBehinds,
validChildClasses: [WordClass.ichidanVerb],
wordClass: WordClass.ichidanVerb,
),
LemmatizationRule.simple(
name: 'Ichidan verb - negative imperative form',
pattern: 'るな',
replacement: '',
lookAheadBehind: lookBehinds,
validChildClasses: [WordClass.ichidanVerb],
wordClass: WordClass.ichidanVerb,
),
LemmatizationRule.simple(
name: 'Ichidan verb - desire form',
pattern: 'たい',
replacement: '',
lookAheadBehind: lookBehinds,
validChildClasses: [WordClass.ichidanVerb],
wordClass: WordClass.ichidanVerb,
),
LemmatizationRule.simple(
name: 'Ichidan verb - negative desire form',
pattern: 'たくない',
replacement: '',
lookAheadBehind: lookBehinds,
validChildClasses: [WordClass.ichidanVerb],
wordClass: WordClass.ichidanVerb,
),
LemmatizationRule.simple(
name: 'Ichidan verb - past desire form',
pattern: 'たかった',
replacement: '',
lookAheadBehind: lookBehinds,
validChildClasses: [WordClass.ichidanVerb],
wordClass: WordClass.ichidanVerb,
),
LemmatizationRule.simple(
name: 'Ichidan verb - negative past desire form',
pattern: 'たくなかった',
replacement: '',
lookAheadBehind: lookBehinds,
validChildClasses: [WordClass.ichidanVerb],
wordClass: WordClass.ichidanVerb,
),
];

View File

@@ -481,9 +481,9 @@ const Map<String, String> latin_to_hiragana = {
'#~': '',
};
bool _smallTsu(String for_conversion) => for_conversion == hiragana_small_tsu;
bool _nFollowedByYuYeYo(String for_conversion, String kana) =>
for_conversion == hiragana_syllabic_n &&
bool _smallTsu(String forConversion) => forConversion == hiragana_small_tsu;
bool _nFollowedByYuYeYo(String forConversion, String kana) =>
forConversion == hiragana_syllabic_n &&
kana.length > 1 &&
'やゆよ'.contains(kana.substring(1, 2));
@@ -495,17 +495,17 @@ String transliterateHiraganaToLatin(String hiragana) {
while (kana.isNotEmpty) {
final lengths = [if (kana.length > 1) 2, 1];
for (final length in lengths) {
final String for_conversion = kana.substring(0, length);
final String forConversion = kana.substring(0, length);
String? mora;
if (_smallTsu(for_conversion)) {
if (_smallTsu(forConversion)) {
geminate = true;
kana = kana.replaceRange(0, length, '');
break;
} else if (_nFollowedByYuYeYo(for_conversion, kana)) {
} else if (_nFollowedByYuYeYo(forConversion, kana)) {
mora = "n'";
}
mora ??= hiragana_to_latin[for_conversion];
mora ??= hiragana_to_latin[forConversion];
if (mora != null) {
if (geminate) {
@@ -516,7 +516,7 @@ String transliterateHiraganaToLatin(String hiragana) {
kana = kana.replaceRange(0, length, '');
break;
} else if (length == 1) {
romaji += for_conversion;
romaji += forConversion;
kana = kana.replaceRange(0, length, '');
}
}
@@ -524,48 +524,46 @@ String transliterateHiraganaToLatin(String hiragana) {
return romaji;
}
bool _doubleNFollowedByAIUEO(String for_conversion) =>
RegExp(r'^nn[aiueo]$').hasMatch(for_conversion);
bool _hasTableMatch(String for_conversion) =>
latin_to_hiragana[for_conversion] != null;
bool _hasDoubleConsonant(String for_conversion, int length) =>
for_conversion == 'tch' ||
bool _doubleNFollowedByAIUEO(String forConversion) =>
RegExp(r'^nn[aiueo]$').hasMatch(forConversion);
bool _hasTableMatch(String forConversion) =>
latin_to_hiragana[forConversion] != null;
bool _hasDoubleConsonant(String forConversion, int length) =>
forConversion == 'tch' ||
(length == 2 &&
RegExp(r'^([kgsztdnbpmyrlwchf])\1$').hasMatch(for_conversion));
RegExp(r'^([kgsztdnbpmyrlwchf])\1$').hasMatch(forConversion));
String transliterateLatinToHiragana(String latin) {
String romaji =
latin.toLowerCase().replaceAll('mb', 'nb').replaceAll('mp', 'np');
String romaji = latin
.toLowerCase()
.replaceAll('mb', 'nb')
.replaceAll('mp', 'np');
String kana = '';
while (romaji.isNotEmpty) {
final lengths = [
if (romaji.length > 2) 3,
if (romaji.length > 1) 2,
1,
];
final lengths = [if (romaji.length > 2) 3, if (romaji.length > 1) 2, 1];
for (final length in lengths) {
String? mora;
int for_removal = length;
final String for_conversion = romaji.substring(0, length);
int forRemoval = length;
final String forConversion = romaji.substring(0, length);
if (_doubleNFollowedByAIUEO(for_conversion)) {
if (_doubleNFollowedByAIUEO(forConversion)) {
mora = hiragana_syllabic_n;
for_removal = 1;
} else if (_hasTableMatch(for_conversion)) {
mora = latin_to_hiragana[for_conversion];
} else if (_hasDoubleConsonant(for_conversion, length)) {
forRemoval = 1;
} else if (_hasTableMatch(forConversion)) {
mora = latin_to_hiragana[forConversion];
} else if (_hasDoubleConsonant(forConversion, length)) {
mora = hiragana_small_tsu;
for_removal = 1;
forRemoval = 1;
}
if (mora != null) {
kana += mora;
romaji = romaji.replaceRange(0, for_removal, '');
romaji = romaji.replaceRange(0, forRemoval, '');
break;
} else if (length == 1) {
kana += for_conversion;
kana += forConversion;
romaji = romaji.replaceRange(0, 1, '');
}
}
@@ -579,11 +577,11 @@ String _transposeCodepointsInRange(
int distance,
int rangeStart,
int rangeEnd,
) =>
String.fromCharCodes(
text.codeUnits
.map((c) => c + ((rangeStart <= c && c <= rangeEnd) ? distance : 0)),
);
) => String.fromCharCodes(
text.codeUnits.map(
(c) => c + ((rangeStart <= c && c <= rangeEnd) ? distance : 0),
),
);
String transliterateKanaToLatin(String kana) =>
transliterateHiraganaToLatin(transliterateKatakanaToHiragana(kana));
@@ -599,12 +597,7 @@ String transliterateHiraganaToKatakana(String hiragana) =>
String transliterateFullwidthRomajiToHalfwidth(String halfwidth) =>
_transposeCodepointsInRange(
_transposeCodepointsInRange(
halfwidth,
-65248,
65281,
65374,
),
_transposeCodepointsInRange(halfwidth, -65248, 65281, 65374),
-12256,
12288,
12288,
@@ -612,12 +605,7 @@ String transliterateFullwidthRomajiToHalfwidth(String halfwidth) =>
String transliterateHalfwidthRomajiToFullwidth(String halfwidth) =>
_transposeCodepointsInRange(
_transposeCodepointsInRange(
halfwidth,
65248,
33,
126,
),
_transposeCodepointsInRange(halfwidth, 65248, 33, 126),
12256,
32,
32,

View File

@@ -1,3 +1,3 @@
String escapeStringValue(String value) {
return "'" + value.replaceAll("'", "''") + "'";
return "'${value.replaceAll("'", "''")}'";
}

View File

@@ -4,7 +4,7 @@
/// See https://www.regular-expressions.info/unicode.html
///
/// Remember to turn on the unicode flag when making a new RegExp.
const String rawKanjiRegex = r'\p{Script=Hani}';
const String rawCJKRegex = r'\p{Script=Hani}';
/// The string version of a regex that will match any katakana.
/// This includes the ranges (), ()
@@ -22,7 +22,24 @@ const String rawKatakanaRegex = r'\p{Script=Katakana}';
/// Remember to turn on the unicode flag when making a new RegExp.
const String rawHiraganaRegex = r'\p{Script=Hiragana}';
/// The string version of a regex that will match any kanji.
/// This includes the ranges (), ()
///
/// See https://www.regular-expressions.info/unicode.html
///
/// Remember to turn on the unicode flag when making a new RegExp.
const String rawKanjiRegex = r'[\u3400-\u4DB5\u4E00-\u9FCB\uF900-\uFA6A]';
final RegExp kanjiRegex = RegExp(rawKanjiRegex, unicode: true);
final RegExp cjkRegex = RegExp(rawCJKRegex, unicode: true);
final RegExp katakanaRegex = RegExp(rawKatakanaRegex, unicode: true);
final RegExp hiraganaRegex = RegExp(rawHiraganaRegex, unicode: true);
final RegExp kanjiRegex = RegExp(rawKanjiRegex, unicode: true);
List<String> filterKanjiSuggestions(String string) {
return kanjiRegex
.allMatches(string)
.map((match) => match.group(0))
.where((element) => element != null)
.map((element) => element!)
.toList();
}

View File

@@ -33,39 +33,40 @@ CREATE TABLE "JMdict_InfoReading" (
-- not implement a check for it.
CREATE TABLE "JMdict_Entry" (
"id" INTEGER PRIMARY KEY
"entryId" INTEGER PRIMARY KEY
);
-- KanjiElement
CREATE TABLE "JMdict_KanjiElement" (
"entryId" INTEGER NOT NULL REFERENCES "JMdict_Entry"("id"),
"orderNum" INTEGER,
"elementId" INTEGER PRIMARY KEY,
"entryId" INTEGER NOT NULL REFERENCES "JMdict_Entry"("entryId"),
"orderNum" INTEGER NOT NULL,
"reading" TEXT NOT NULL,
"news" INTEGER CHECK ("news" BETWEEN 1 AND 2),
"ichi" INTEGER CHECK ("ichi" BETWEEN 1 AND 2),
"spec" INTEGER CHECK ("spec" BETWEEN 1 AND 2),
"gai" INTEGER CHECK ("gai" BETWEEN 1 AND 2),
"nf" INTEGER CHECK ("nf" BETWEEN 1 AND 48),
PRIMARY KEY ("entryId", "reading")
UNIQUE("entryId", "reading"),
UNIQUE("entryId", "orderNum")
) WITHOUT ROWID;
CREATE INDEX "JMdict_KanjiElement_byEntryId_byOrderNum" ON "JMdict_KanjiElement"("entryId", "orderNum");
CREATE INDEX "JMdict_KanjiElement_byReading" ON "JMdict_KanjiElement"("reading");
CREATE TABLE "JMdict_KanjiElementInfo" (
"entryId" INTEGER NOT NULL,
"reading" TEXT NOT NULL,
"elementId" INTEGER NOT NULL REFERENCES "JMdict_KanjiElement"("elementId"),
"info" TEXT NOT NULL REFERENCES "JMdict_InfoKanji"("id"),
FOREIGN KEY ("entryId", "reading")
REFERENCES "JMdict_KanjiElement"("entryId", "reading"),
PRIMARY KEY ("entryId", "reading", "info")
PRIMARY KEY ("elementId", "info")
) WITHOUT ROWID;
-- ReadingElement
CREATE TABLE "JMdict_ReadingElement" (
"entryId" INTEGER NOT NULL REFERENCES "JMdict_Entry"("id"),
"orderNum" INTEGER,
"elementId" INTEGER PRIMARY KEY,
"entryId" INTEGER NOT NULL REFERENCES "JMdict_Entry"("entryId"),
"orderNum" INTEGER NOT NULL,
"reading" TEXT NOT NULL,
"readingDoesNotMatchKanji" BOOLEAN NOT NULL DEFAULT FALSE,
"news" INTEGER CHECK ("news" BETWEEN 1 AND 2),
@@ -73,55 +74,51 @@ CREATE TABLE "JMdict_ReadingElement" (
"spec" INTEGER CHECK ("spec" BETWEEN 1 AND 2),
"gai" INTEGER CHECK ("gai" BETWEEN 1 AND 2),
"nf" INTEGER CHECK ("nf" BETWEEN 1 AND 48),
PRIMARY KEY ("entryId", "reading")
UNIQUE("entryId", "reading"),
UNIQUE("entryId", "orderNum")
) WITHOUT ROWID;
CREATE INDEX "JMdict_ReadingElement_byEntryId_byOrderNum" ON "JMdict_ReadingElement"("entryId", "orderNum");
CREATE INDEX "JMdict_ReadingElement_byReading" ON "JMdict_ReadingElement"("reading");
CREATE TABLE "JMdict_ReadingElementRestriction" (
"entryId" INTEGER NOT NULL,
"reading" TEXT NOT NULL,
"elementId" INTEGER NOT NULL REFERENCES "JMdict_ReadingElement"("elementId"),
"restriction" TEXT NOT NULL,
FOREIGN KEY ("entryId", "reading")
REFERENCES "JMdict_ReadingElement"("entryId", "reading"),
PRIMARY KEY ("entryId", "reading", "restriction")
PRIMARY KEY ("elementId", "restriction")
) WITHOUT ROWID;
CREATE TABLE "JMdict_ReadingElementInfo" (
"entryId" INTEGER NOT NULL,
"reading" TEXT NOT NULL,
"elementId" INTEGER NOT NULL REFERENCES "JMdict_ReadingElement"("elementId"),
"info" TEXT NOT NULL REFERENCES "JMdict_InfoReading"("id"),
FOREIGN KEY ("entryId", "reading")
REFERENCES "JMdict_ReadingElement"("entryId", "reading"),
PRIMARY KEY ("entryId", "reading", "info")
PRIMARY KEY ("elementId", "info")
) WITHOUT ROWID;
-- Sense
CREATE TABLE "JMdict_Sense" (
"id" INTEGER PRIMARY KEY AUTOINCREMENT,
"entryId" INTEGER REFERENCES "JMdict_Entry"("id"),
"orderNum" INTEGER,
"senseId" INTEGER PRIMARY KEY,
"entryId" INTEGER NOT NULL REFERENCES "JMdict_Entry"("entryId"),
"orderNum" INTEGER NOT NULL,
UNIQUE("entryId", "orderNum")
);
CREATE INDEX "JMdict_Sense_byEntryId_byOrderNum" ON "JMdict_Sense"("entryId", "orderNum");
CREATE TABLE "JMdict_SenseRestrictedToKanji" (
"entryId" INTEGER,
"senseId" INTEGER REFERENCES "JMdict_Sense"("id"),
"kanji" TEXT,
"entryId" INTEGER NOT NULL,
"senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("senseId"),
"kanji" TEXT NOT NULL,
FOREIGN KEY ("entryId", "kanji") REFERENCES "JMdict_KanjiElement"("entryId", "reading"),
PRIMARY KEY ("entryId", "senseId", "kanji")
);
) WITHOUT ROWID;
CREATE TABLE "JMdict_SenseRestrictedToReading" (
"entryId" INTEGER,
"senseId" INTEGER REFERENCES "JMdict_Sense"("id"),
"reading" TEXT,
"entryId" INTEGER NOT NULL,
"senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("senseId"),
"reading" TEXT NOT NULL,
FOREIGN KEY ("entryId", "reading") REFERENCES "JMdict_ReadingElement"("entryId", "reading"),
PRIMARY KEY ("entryId", "senseId", "reading")
);
) WITHOUT ROWID;
-- In order to add xrefs, you will need to have added the entry to xref to.
-- These should be added in a second pass of the dictionary file.
@@ -134,37 +131,33 @@ CREATE TABLE "JMdict_SenseRestrictedToReading" (
-- These two things also concern "SenseAntonym"
CREATE TABLE "JMdict_SenseSeeAlso" (
"senseId" INTEGER REFERENCES "JMdict_Sense"("id"),
"xrefEntryId" INTEGER,
"senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("senseId"),
"xrefEntryId" INTEGER NOT NULL,
"seeAlsoReading" TEXT,
"seeAlsoKanji" TEXT,
"seeAlsoSense" INTEGER,
-- For some entries, the cross reference is ambiguous. This means that while the ingestion
-- has determined some xrefEntryId, it is not guaranteed to be the correct one.
"ambiguous" BOOLEAN,
CHECK ("seeAlsoReading" = NULL <> "seeAlsoKanji" = NULL),
-- CHECK("seeAlsoSense" = NULL OR "seeAlsoSense")
-- TODO: Check that if seeAlsoSense is present, it refers to a sense connected to xrefEntryId.
"ambiguous" BOOLEAN NOT NULL DEFAULT FALSE,
FOREIGN KEY ("xrefEntryId", "seeAlsoKanji") REFERENCES "JMdict_KanjiElement"("entryId", "reading"),
FOREIGN KEY ("xrefEntryId", "seeAlsoReading") REFERENCES "JMdict_ReadingElement"("entryId", "reading"),
FOREIGN KEY ("xrefEntryId", "seeAlsoSense") REFERENCES "JMdict_Sense"("entryId", "orderNum"),
PRIMARY KEY ("senseId", "xrefEntryId", "seeAlsoReading", "seeAlsoKanji", "seeAlsoSense")
UNIQUE("senseId", "xrefEntryId", "seeAlsoReading", "seeAlsoKanji", "seeAlsoSense")
);
CREATE TABLE "JMdict_SenseAntonym" (
"senseId" INTEGER REFERENCES "JMdict_Sense"("id"),
"xrefEntryId" INTEGER,
"senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("senseId"),
"xrefEntryId" INTEGER NOT NULL,
"antonymReading" TEXT,
"antonymKanji" TEXT,
"antonymSense" INTEGER,
-- For some entries, the cross reference is ambiguous. This means that while the ingestion
-- has determined some xrefEntryId, it is not guaranteed to be the correct one.
"ambiguous" BOOLEAN,
CHECK ("antonymReading" = NULL <> "antonymKanji" = NULL),
"ambiguous" BOOLEAN NOT NULL DEFAULT FALSE,
FOREIGN KEY ("xrefEntryId", "antonymKanji") REFERENCES "JMdict_KanjiElement"("entryId", "reading"),
FOREIGN KEY ("xrefEntryId", "antonymReading") REFERENCES "JMdict_ReadingElement"("entryId", "reading"),
FOREIGN KEY ("xrefEntryId", "antonymSense") REFERENCES "JMdict_Sense"("entryId", "orderNum"),
PRIMARY KEY ("senseId", "xrefEntryId", "antonymReading", "antonymKanji", "antonymSense")
UNIQUE("senseId", "xrefEntryId", "antonymReading", "antonymKanji", "antonymSense")
);
-- These cross references are going to be mostly accessed from a sense
@@ -173,7 +166,7 @@ CREATE INDEX "JMdict_SenseSeeAlso_bySenseId" ON "JMdict_SenseSeeAlso"("senseId")
CREATE INDEX "JMdict_SenseAntonym_bySenseId" ON "JMdict_SenseAntonym"("senseId");
CREATE TABLE "JMdict_SensePOS" (
"senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("id"),
"senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("senseId"),
"pos" TEXT NOT NULL REFERENCES "JMdict_InfoPOS"("id"),
PRIMARY KEY ("senseId", "pos")
) WITHOUT ROWID;
@@ -181,28 +174,28 @@ CREATE TABLE "JMdict_SensePOS" (
CREATE TABLE "JMdict_SenseField" (
"senseId" INTEGER NOT NULL,
"field" TEXT NOT NULL,
FOREIGN KEY ("senseId") REFERENCES "JMdict_Sense"("id"),
FOREIGN KEY ("senseId") REFERENCES "JMdict_Sense"("senseId"),
FOREIGN KEY ("field") REFERENCES "JMdict_InfoField"("id"),
PRIMARY KEY ("senseId", "field")
) WITHOUT ROWID;
CREATE TABLE "JMdict_SenseMisc" (
"senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("id"),
"senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("senseId"),
"misc" TEXT NOT NULL REFERENCES "JMdict_InfoMisc"("id"),
PRIMARY KEY ("senseId", "misc")
) WITHOUT ROWID;
CREATE TABLE "JMdict_SenseLanguageSource" (
"senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("id"),
"senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("senseId"),
"language" CHAR(3) NOT NULL DEFAULT "eng",
"phrase" TEXT,
"fullyDescribesSense" BOOLEAN NOT NULL DEFAULT TRUE,
"constructedFromSmallerWords" BOOLEAN NOT NULL DEFAULT FALSE,
PRIMARY KEY ("senseId", "language", "phrase")
UNIQUE("senseId", "language", "phrase")
);
CREATE TABLE "JMdict_SenseDialect" (
"senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("id"),
"senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("senseId"),
"dialect" TEXT NOT NULL REFERENCES "JMdict_InfoDialect"("id"),
PRIMARY KEY ("senseId", "dialect")
) WITHOUT ROWID;
@@ -213,7 +206,7 @@ CREATE TABLE "JMdict_SenseDialect" (
-- will be omitted.
CREATE TABLE "JMdict_SenseGlossary" (
"senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("id"),
"senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("senseId"),
"phrase" TEXT NOT NULL,
"language" CHAR(3) NOT NULL DEFAULT "eng",
"type" TEXT,
@@ -223,7 +216,7 @@ CREATE TABLE "JMdict_SenseGlossary" (
CREATE INDEX "JMdict_SenseGlossary_byPhrase" ON JMdict_SenseGlossary("phrase");
CREATE TABLE "JMdict_SenseInfo" (
"senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("id"),
"senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("senseId"),
"info" TEXT NOT NULL,
PRIMARY KEY ("senseId", "info")
) WITHOUT ROWID;
@@ -232,8 +225,7 @@ CREATE TABLE "JMdict_SenseInfo" (
-- the Tanaka Corpus, so I will leave the type out for now.
CREATE TABLE "JMdict_ExampleSentence" (
"id" INTEGER PRIMARY KEY,
"senseId" INTEGER REFERENCES "JMdict_Sense"("id"),
"senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("senseId"),
"word" TEXT NOT NULL,
"source" TEXT NOT NULL,
"sourceLanguage" CHAR(3) NOT NULL DEFAULT "eng",

View File

@@ -0,0 +1,55 @@
CREATE VIRTUAL TABLE "JMdict_KanjiElementFTS" USING FTS5("elementId" UNINDEXED, "reading");
CREATE TRIGGER "JMdict_KanjiElement_InsertFTS"
AFTER INSERT ON "JMdict_KanjiElement"
BEGIN
INSERT INTO "JMdict_KanjiElementFTS"("elementId", "reading")
VALUES (NEW."elementId", NEW."reading");
END;
CREATE TRIGGER "JMdict_KanjiElement_UpdateFTS"
AFTER UPDATE OF "elementId", "reading"
ON "JMdict_KanjiElement"
BEGIN
UPDATE "JMdict_KanjiElementFTS"
SET
"elementId" = NEW."elementId",
"reading" = NEW."reading"
WHERE "elementId" = OLD."elementId";
END;
CREATE TRIGGER "JMdict_KanjiElement_DeleteFTS"
AFTER DELETE ON "JMdict_KanjiElement"
BEGIN
DELETE FROM "JMdict_KanjiElementFTS"
WHERE "elementId" = OLD."elementId";
END;
CREATE VIRTUAL TABLE "JMdict_ReadingElementFTS" USING FTS5("elementId" UNINDEXED, "reading");
CREATE TRIGGER "JMdict_ReadingElement_InsertFTS"
AFTER INSERT ON "JMdict_ReadingElement"
BEGIN
INSERT INTO "JMdict_ReadingElementFTS"("elementId", "reading")
VALUES (NEW."elementId", NEW."reading");
END;
CREATE TRIGGER "JMdict_ReadingElement_UpdateFTS"
AFTER UPDATE OF "elementId", "reading"
ON "JMdict_ReadingElement"
BEGIN
UPDATE "JMdict_ReadingElementFTS"
SET
"elementId" = NEW."elementId",
"reading" = NEW."reading"
WHERE "elementId" = OLD."elementId";
END;
CREATE TRIGGER "JMdict_ReadingElement_DeleteFTS"
AFTER DELETE ON "JMdict_ReadingElement"
BEGIN
DELETE FROM "JMdict_ReadingElementFTS"
WHERE "elementId" = OLD."elementId";
END;

View File

@@ -2,7 +2,7 @@ CREATE TABLE "JMdict_JLPTTag" (
"entryId" INTEGER NOT NULL,
"jlptLevel" CHAR(2) NOT NULL CHECK ("jlptLevel" in ('N5', 'N4', 'N3', 'N2', 'N1')),
FOREIGN KEY ("entryId")
REFERENCES "JMdict_Entry"("id"),
REFERENCES "JMdict_Entry"("entryId"),
PRIMARY KEY ("entryId", "jlptLevel")
) WITHOUT ROWID;

View File

@@ -0,0 +1,207 @@
CREATE TABLE "JMdict_EntryScore" (
"type" CHAR(1) NOT NULL CHECK ("type" IN ('r', 'k')),
"entryId" INTEGER NOT NULL REFERENCES "JMdict_Entry"("entryId"),
"elementId" INTEGER NOT NULL,
"score" INTEGER NOT NULL DEFAULT 0,
"common" BOOLEAN NOT NULL DEFAULT FALSE,
PRIMARY KEY ("type", "elementId")
) WITHOUT ROWID;
CREATE INDEX "JMdict_EntryScore_byElementId_byScore" ON "JMdict_EntryScore"("elementId", "score");
CREATE INDEX "JMdict_EntryScore_byScore" ON "JMdict_EntryScore"("score");
CREATE INDEX "JMdict_EntryScore_byCommon" ON "JMdict_EntryScore"("common");
CREATE INDEX "JMdict_EntryScore_byType_byElementId_byScore" ON "JMdict_EntryScore"("type", "elementId", "score");
CREATE INDEX "JMdict_EntryScore_byType_byScore" ON "JMdict_EntryScore"("type", "score");
CREATE INDEX "JMdict_EntryScore_byType_byCommon" ON "JMdict_EntryScore"("type", "common");
-- NOTE: these views are deduplicated in order not to perform an unnecessary
-- UNION on every trigger
CREATE VIEW "JMdict_EntryScoreView_Reading" AS
SELECT
'r' AS "type",
"JMdict_ReadingElement"."entryId",
"JMdict_ReadingElement"."elementId",
(
"news" IS 1
OR "ichi" IS 1
OR "spec" IS 1
OR "gai" IS 1
)
AS "common",
((
"news" IS 1
OR "ichi" IS 1
OR "spec" IS 1
OR "gai" IS 1
) * 50)
+ (("news" IS 1) * 10)
+ (("news" IS 2) * 5)
+ (("ichi" IS 1) * 10)
+ (("ichi" IS 2) * 5)
+ (("spec" IS 1) * 10)
+ (("spec" IS 2) * 5)
+ (("gai" IS 1) * 10)
+ (("gai" IS 2) * 5)
+ (("orderNum" IS 1) * 20)
- (substr(COALESCE("JMdict_JLPTTag"."jlptLevel", 'N0'), 2) * -5)
AS "score"
FROM "JMdict_ReadingElement"
LEFT JOIN "JMdict_JLPTTag" USING ("entryId");
CREATE VIEW "JMdict_EntryScoreView_Kanji" AS
SELECT
'k' AS "type",
"JMdict_KanjiElement"."entryId",
"JMdict_KanjiElement"."elementId",
(
"news" IS 1
OR "ichi" IS 1
OR "spec" IS 1
OR "gai" IS 1
)
AS "common",
((
"news" IS 1
OR "ichi" IS 1
OR "spec" IS 1
OR "gai" IS 1
) * 50)
+ (("news" IS 1) * 10)
+ (("news" IS 2) * 5)
+ (("ichi" IS 1) * 10)
+ (("ichi" IS 2) * 5)
+ (("spec" IS 1) * 10)
+ (("spec" IS 2) * 5)
+ (("gai" IS 1) * 10)
+ (("gai" IS 2) * 5)
+ (("orderNum" IS 1) * 20)
- (substr(COALESCE("JMdict_JLPTTag"."jlptLevel", 'N0'), 2) * -5)
AS "score"
FROM "JMdict_KanjiElement"
LEFT JOIN "JMdict_JLPTTag" USING ("entryId");
CREATE VIEW "JMdict_EntryScoreView" AS
SELECT *
FROM "JMdict_EntryScoreView_Kanji"
UNION ALL
SELECT *
FROM "JMdict_EntryScoreView_Reading";
--- JMdict_ReadingElement triggers
CREATE TRIGGER "JMdict_EntryScore_Insert_JMdict_ReadingElement"
AFTER INSERT ON "JMdict_ReadingElement"
BEGIN
INSERT INTO "JMdict_EntryScore" (
"type",
"entryId",
"elementId",
"score",
"common"
)
SELECT "type", "entryId", "elementId", "score", "common"
FROM "JMdict_EntryScoreView_Reading"
WHERE "elementId" = NEW."elementId";
END;
CREATE TRIGGER "JMdict_EntryScore_Update_JMdict_ReadingElement"
AFTER UPDATE OF "news", "ichi", "spec", "gai", "nf", "orderNum"
ON "JMdict_ReadingElement"
BEGIN
UPDATE "JMdict_EntryScore"
SET
"score" = "JMdict_EntryScoreView_Reading"."score",
"common" = "JMdict_EntryScoreView_Reading"."common"
FROM "JMdict_EntryScoreView_Reading"
WHERE "elementId" = NEW."elementId";
END;
CREATE TRIGGER "JMdict_EntryScore_Delete_JMdict_ReadingElement"
AFTER DELETE ON "JMdict_ReadingElement"
BEGIN
DELETE FROM "JMdict_EntryScore"
WHERE "type" = 'r'
AND "elementId" = OLD."elementId";
END;
--- JMdict_KanjiElement triggers
CREATE TRIGGER "JMdict_EntryScore_Insert_JMdict_KanjiElement"
AFTER INSERT ON "JMdict_KanjiElement"
BEGIN
INSERT INTO "JMdict_EntryScore" (
"type",
"entryId",
"elementId",
"score",
"common"
)
SELECT "type", "entryId", "elementId", "score", "common"
FROM "JMdict_EntryScoreView_Kanji"
WHERE "elementId" = NEW."elementId";
END;
CREATE TRIGGER "JMdict_EntryScore_Update_JMdict_KanjiElement"
AFTER UPDATE OF "news", "ichi", "spec", "gai", "nf", "orderNum"
ON "JMdict_KanjiElement"
BEGIN
UPDATE "JMdict_EntryScore"
SET
"score" = "JMdict_EntryScoreView_Kanji"."score",
"common" = "JMdict_EntryScoreView_Kanji"."common"
FROM "JMdict_EntryScoreView_Kanji"
WHERE "elementId" = NEW."elementId";
END;
CREATE TRIGGER "JMdict_EntryScore_Delete_JMdict_KanjiElement"
AFTER DELETE ON "JMdict_KanjiElement"
BEGIN
DELETE FROM "JMdict_EntryScore"
WHERE "type" = 'k'
AND "elementId" = OLD."elementId";
END;
--- JMdict_JLPTTag triggers
CREATE TRIGGER "JMdict_EntryScore_Insert_JMdict_JLPTTag"
AFTER INSERT ON "JMdict_JLPTTag"
BEGIN
UPDATE "JMdict_EntryScore"
SET
"score" = "JMdict_EntryScoreView"."score",
"common" = "JMdict_EntryScoreView"."common"
FROM "JMdict_EntryScoreView"
WHERE "JMdict_EntryScoreView"."entryId" = NEW."entryId"
AND "JMdict_EntryScore"."entryId" = NEW."entryId"
AND "JMdict_EntryScoreView"."elementId" = "JMdict_EntryScore"."elementId";
END;
CREATE TRIGGER "JMdict_EntryScore_Update_JMdict_JLPTTag"
AFTER UPDATE OF "jlptLevel"
ON "JMdict_JLPTTag"
BEGIN
UPDATE "JMdict_EntryScore"
SET
"score" = "JMdict_EntryScoreView"."score",
"common" = "JMdict_EntryScoreView"."common"
FROM "JMdict_EntryScoreView"
WHERE "JMdict_EntryScoreView"."entryId" = NEW."entryId"
AND "JMdict_EntryScore"."entryId" = NEW."entryId"
AND "JMdict_EntryScoreView"."elementId" = "JMdict_EntryScore"."elementId";
END;
CREATE TRIGGER "JMdict_EntryScore_Delete_JMdict_JLPTTag"
AFTER DELETE ON "JMdict_JLPTTag"
BEGIN
UPDATE "JMdict_EntryScore"
SET
"score" = "JMdict_EntryScoreView"."score",
"common" = "JMdict_EntryScoreView"."common"
FROM "JMdict_EntryScoreView"
WHERE "JMdict_EntryScoreView"."entryId" = OLD."entryId"
AND "JMdict_EntryScore"."entryId" = OLD."entryId"
AND "JMdict_EntryScoreView"."elementId" = "JMdict_EntryScore"."elementId";
END;

View File

@@ -6,6 +6,3 @@ CREATE TABLE "RADKFILE" (
CREATE INDEX "RADK" ON "RADKFILE"("radical");
CREATE INDEX "KRAD" ON "RADKFILE"("kanji");
CREATE VIEW "RADKFILE_Radicals" AS
SELECT DISTINCT "radical" FROM "RADKFILE";

View File

@@ -1,22 +0,0 @@
-- These tables are for optimizing searches.
-- In order to include results from both, the software should
-- first check if the searchword is convertible to kana, and then
-- potentially get results from both by doing a union between two
-- selects.
CREATE TABLE "JMdict_EntryByKana" (
"kana" TEXT NOT NULL,
"entryId" INTEGER NOT NULL REFERENCES "JMdict_Entry"("id"),
PRIMARY KEY ("kana", "entryId")
) WITHOUT ROWID;
CREATE INDEX "JMdict_EntryByKana_byKana" ON "JMdict_EntryByKana"("kana");
CREATE TABLE "JMdict_EntryByEnglish" (
"english" TEXT NOT NULL,
"entryId" INTEGER NOT NULL REFERENCES "JMdict_Entry"("id"),
PRIMARY KEY ("english", "entryId")
) WITHOUT ROWID;
CREATE INDEX "JMdict_EntryByEnglish_byEnglish" ON "JMdict_EntryByEnglish"("english");

View File

@@ -6,12 +6,6 @@ CREATE TABLE "KANJIDIC_Character" (
"jlpt" INTEGER
) WITHOUT ROWID;
CREATE TABLE "KANJIDIC_RadicalName" (
"kanji" CHAR(1) NOT NULL REFERENCES "KANJIDIC_Character"("literal"),
"name" TEXT NOT NULL,
PRIMARY KEY("kanji", "name")
) WITHOUT ROWID;
CREATE TABLE "KANJIDIC_Codepoint" (
"kanji" CHAR(1) NOT NULL REFERENCES "KANJIDIC_Character"("literal"),
"type" VARCHAR(6) NOT NULL CHECK ("type" IN ('jis208', 'jis212', 'jis213', 'ucs')),
@@ -22,12 +16,25 @@ CREATE TABLE "KANJIDIC_Codepoint" (
CREATE INDEX "KANJIDIC_Codepoint_byCharacter" ON "KANJIDIC_Codepoint"("kanji");
CREATE TABLE "KANJIDIC_Radical" (
"kanji" CHAR(1) NOT NULL REFERENCES "KANJIDIC_Character"("literal"),
"type" VARCHAR(9) NOT NULL CHECK ("type" IN ('classical', 'nelson_c')),
"radical" INTEGER NOT NULL CHECK ("radical" BETWEEN 1 AND IIF("type" = 'classical', 214, 213)),
PRIMARY KEY("kanji", "type")
"kanji" CHAR(1) NOT NULL PRIMARY KEY REFERENCES "KANJIDIC_Character"("literal"),
"radicalId" INTEGER NOT NULL CHECK ("radicalId" BETWEEN 1 AND 214)
) WITHOUT ROWID;
CREATE INDEX "KANJIDIC_Radical_byRadicalId" ON "KANJIDIC_Radical"("radicalId");
CREATE TABLE "KANJIDIC_RadicalNelsonCId" (
"radicalId" INTEGER NOT NULL PRIMARY KEY CHECK ("radicalId" BETWEEN 1 AND 214),
"nelsonId" INTEGER UNIQUE NOT NULL CHECK ("nelsonId" BETWEEN 1 AND 213)
);
CREATE TABLE "KANJIDIC_RadicalName" (
"radicalId" INTEGER NOT NULL CHECK ("radicalId" BETWEEN 1 AND 214),
"name" TEXT NOT NULL,
PRIMARY KEY("radicalId", "name")
) WITHOUT ROWID;
CREATE INDEX "KANJIDIC_RadicalName_byRadicalId" ON "KANJIDIC_RadicalName"("radicalId");
CREATE TABLE "KANJIDIC_StrokeMiscount" (
"kanji" CHAR(1) NOT NULL REFERENCES "KANJIDIC_Character"("literal"),
"strokeCount" INTEGER NOT NULL,
@@ -106,6 +113,7 @@ CREATE TABLE "KANJIDIC_QueryCode" (
"code" VARCHAR(7) NOT NULL,
"type" VARCHAR(11) NOT NULL CHECK ("type" IN ('skip', 'sh_desc', 'four_corner', 'deroo', 'misclass')),
"SKIPMisclassification" VARCHAR(15),
CHECK ("SKIPMisclassification" IS NULL OR "type" = 'skip'),
PRIMARY KEY ("kanji", "type", "code")
) WITHOUT ROWID;
@@ -120,30 +128,39 @@ CREATE INDEX "KANJIDIC_Reading_byReading" ON "KANJIDIC_Reading"("reading");
CREATE TABLE "KANJIDIC_Kunyomi" (
"kanji" CHAR(1) NOT NULL REFERENCES "KANJIDIC_Character"("literal"),
"orderNum" INTEGER NOT NULL,
"yomi" TEXT NOT NULL,
"isJouyou" BOOLEAN,
UNIQUE("kanji", "orderNum"),
PRIMARY KEY ("kanji", "yomi")
) WITHOUT ROWID;
CREATE INDEX "KANJIDIC_Kunyomi_byKanji_byOrderNum" ON "KANJIDIC_Kunyomi"("kanji", "orderNum");
CREATE INDEX "KANJIDIC_Kunyomi_byYomi" ON "KANJIDIC_Kunyomi"("yomi");
CREATE TABLE "KANJIDIC_Onyomi" (
"kanji" CHAR(1) NOT NULL REFERENCES "KANJIDIC_Character"("literal"),
"orderNum" INTEGER NOT NULL,
"yomi" TEXT NOT NULL,
"type" VARCHAR(7) CHECK ("type" IN ('kan', 'go', 'tou', 'kan''you')),
"isJouyou" BOOLEAN,
UNIQUE("kanji", "orderNum"),
PRIMARY KEY ("kanji", "yomi")
) WITHOUT ROWID;
CREATE INDEX "KANJIDIC_Onyomi_byKanji_byOrderNum" ON "KANJIDIC_Onyomi"("kanji", "orderNum");
CREATE INDEX "KANJIDIC_Onyomi_byYomi" ON "KANJIDIC_Onyomi"("yomi");
CREATE TABLE "KANJIDIC_Meaning" (
"kanji" CHAR(1) NOT NULL REFERENCES "KANJIDIC_Character"("literal"),
"orderNum" INTEGER NOT NULL,
"language" CHAR(3) NOT NULL DEFAULT "eng",
"meaning" TEXT NOT NULL,
UNIQUE("kanji", "orderNum"),
PRIMARY KEY ("kanji", "language", "meaning")
) WITHOUT ROWID;
CREATE INDEX "KANJIDIC_Meaning_byKanji_byOrderNum" ON "KANJIDIC_Meaning"("kanji", "orderNum");
CREATE INDEX "KANJIDIC_Meaning_byMeaning" ON "KANJIDIC_Meaning"("meaning");
CREATE TABLE "KANJIDIC_Nanori" (

View File

@@ -1,13 +1,11 @@
CREATE TABLE "XREF__KANJIDIC_Radical__RADKFILE"(
"radicalId" INTEGER NOT NULL,
"radicalSymbol" CHAR(1) NOT NULL REFERENCES "RADKFILE"("radical"),
"radicalType" VARCHAR(9) NOT NULL CHECK ("radicalType" IN ('classical', 'nelson_c')) DEFAULT 'classical',
PRIMARY KEY ("radicalId", "radicalSymbol", "radicalType"),
FOREIGN KEY ("radicalId", "radicalType") REFERENCES "KANJIDIC_Radical"("radical", "type")
"radicalId" INTEGER NOT NULL CHECK ("radicalId" BETWEEN 1 AND 214),
"radicalSymbol" CHAR(1) UNIQUE NOT NULL REFERENCES "RADKFILE"("radical"),
PRIMARY KEY ("radicalId", "radicalSymbol")
) WITHOUT ROWID;
CREATE INDEX "XREF__KANJIDIC_Radical__RADKFILE__byRadicalId" ON "XREF__KANJIDIC_Radical__RADKFILE"("radicalId");
CREATE INDEX "XREF__KANJIDIC_Radical__RADKFILE__byRadicalSymbol_byRadicalType" ON "XREF__KANJIDIC_Radical__RADKFILE"("radicalSymbol", "radicalType");
CREATE INDEX "XREF__KANJIDIC_Radical__RADKFILE__byRadicalSymbol" ON "XREF__KANJIDIC_Radical__RADKFILE"("radicalSymbol");
/* Source: https://ctext.org/kangxi-zidian */
INSERT INTO "XREF__KANJIDIC_Radical__RADKFILE"("radicalId", "radicalSymbol") VALUES

89
migrations/0010_Views.sql Normal file
View File

@@ -0,0 +1,89 @@
CREATE VIEW "JMdict_EntryByKana"("kana", "entryId")
AS
SELECT
"JMdict_ReadingElement"."reading" AS "kana",
"JMdict_ReadingElement"."entryId" AS "entryId"
FROM "JMdict_ReadingElement";
CREATE VIEW "JMdict_EntryByEnglish"("english", "entryId")
AS
SELECT
"JMdict_SenseGlossary"."phrase" AS "english",
"JMdict_Sense"."senseId" AS "entryId"
FROM "JMdict_SenseGlossary" JOIN "JMdict_Sense" USING("senseId");
CREATE VIEW "JMdict_BaseAndFurigana"("entryId", "base", "furigana", "isFirst", "kanjiOrderNum", "readingOrderNum")
AS
SELECT
"JMdict_Entry"."entryId" AS "entryId",
CASE WHEN (
"JMdict_KanjiElement"."reading" IS NOT NULL
AND NOT "JMdict_ReadingElement"."readingDoesNotMatchKanji"
)
THEN "JMdict_KanjiElement"."reading"
ELSE "JMdict_ReadingElement"."reading"
END AS "base",
CASE WHEN (
"JMdict_KanjiElement"."reading" IS NOT NULL
AND NOT "JMdict_ReadingElement"."readingDoesNotMatchKanji"
)
THEN "JMdict_ReadingElement"."reading"
ELSE NULL
END AS "furigana",
COALESCE("JMdict_KanjiElement"."orderNum", 1)
+ "JMdict_ReadingElement"."orderNum"
= 2
AS "isFirst",
"JMdict_KanjiElement"."orderNum" AS "kanjiOrderNum",
"JMdict_ReadingElement"."orderNum" AS "readingOrderNum"
FROM "JMdict_Entry"
LEFT JOIN "JMdict_KanjiElement" USING("entryId")
LEFT JOIN "JMdict_ReadingElement" USING("entryId");
CREATE VIEW "JMdict_EntryCommon"("entryId")
AS
SELECT DISTINCT "entryId"
FROM "JMdict_EntryScore"
WHERE "JMdict_EntryScore"."common" = 1;
-- TODO: Make it possible to match words that contain the
-- kanji as an infix
CREATE VIEW "KANJIDIC_ExampleEntries"("kanji", "entryId")
AS
SELECT
"JMdict_KanjiElement"."entryId",
"KANJIDIC_Character"."literal" AS "kanji",
"JMdict_KanjiElement"."reading"
FROM
"KANJIDIC_Character"
JOIN "JMdict_KanjiElementFTS"
ON "JMdict_KanjiElementFTS"."reading" MATCH "KANJIDIC_Character"."literal" || '*'
JOIN "JMdict_KanjiElement"
ON "JMdict_KanjiElementFTS"."entryId" = "JMdict_KanjiElement"."entryId"
AND "JMdict_KanjiElementFTS"."reading" LIKE '%' || "JMdict_KanjiElement"."reading"
JOIN "JMdict_EntryScore"
ON "JMdict_EntryScore"."type" = 'k'
AND "JMdict_KanjiElement"."entryId" = "JMdict_EntryScore"."entryId"
AND "JMdict_KanjiElement"."reading" = "JMdict_EntryScore"."reading"
WHERE "JMdict_EntryScore"."common" = 1;
CREATE VIEW "RADKFILE_Radicals" AS
SELECT DISTINCT "radical" FROM "RADKFILE";
CREATE VIEW "JMdict_CombinedEntryScore"
AS
SELECT
CASE
WHEN "JMdict_EntryScore"."type" = 'k'
THEN (SELECT entryId FROM "JMdict_KanjiElement" WHERE "elementId" = "JMdict_EntryScore"."elementId")
WHEN "JMdict_EntryScore"."type" = 'r'
THEN (SELECT entryId FROM "JMdict_ReadingElement" WHERE "elementId" = "JMdict_EntryScore"."elementId")
END AS "entryId",
MAX("JMdict_EntryScore"."score") AS "score",
MAX("JMdict_EntryScore"."common") AS "common"
FROM "JMdict_EntryScore"
GROUP BY "entryId";

View File

@@ -7,6 +7,7 @@
radkfile,
kanjidic2,
sqlite,
wal ? false,
}:
stdenvNoCC.mkDerivation {
name = "jadb";
@@ -16,7 +17,7 @@ stdenvNoCC.mkDerivation {
database-tool
sqlite
];
buildPhase = ''
runHook preBuild
@@ -29,7 +30,9 @@ stdenvNoCC.mkDerivation {
sqlite3 jadb.sqlite < "$migration"
done
"${lib.getExe database-tool}" create-db --libsqlite "${sqlite.out}/lib/libsqlite3.so"
"${lib.getExe database-tool}" create-db \
${lib.optionalString wal "--wal"} \
--libsqlite "${sqlite.out}/lib/libsqlite3.so"
runHook postBuild
'';

View File

@@ -10,6 +10,7 @@
stdenvNoCC.mkDerivation {
name = "docs";
src = database;
nativeBuildInputs = [
sqlite
schemaspy

View File

@@ -9,16 +9,17 @@
stdenvNoCC.mkDerivation {
name = "jmdict";
dontUnpack = true;
srcs = [
jmdict-src
jmdict-with-examples-src
];
dontUnpack = true;
nativeBuildInputs = [
gzip
xmlformat
];
buildPhase = ''
runHook preBuild

View File

@@ -15,6 +15,7 @@ stdenvNoCC.mkDerivation {
gzip
xmlformat
];
buildPhase = ''
runHook preBuild

View File

@@ -15,6 +15,7 @@ stdenv.mkDerivation {
gzip
iconv
];
buildPhase = ''
runHook preBuild

View File

@@ -5,18 +5,18 @@ packages:
dependency: transitive
description:
name: _fe_analyzer_shared
sha256: e55636ed79578b9abca5fecf9437947798f5ef7456308b5cb85720b793eac92f
sha256: da0d9209ca76bde579f2da330aeb9df62b6319c834fa7baae052021b0462401f
url: "https://pub.dev"
source: hosted
version: "82.0.0"
version: "85.0.0"
analyzer:
dependency: transitive
description:
name: analyzer
sha256: "13c1e6c6fd460522ea840abec3f677cc226f5fec7872c04ad7b425517ccf54f7"
sha256: b1ade5707ab7a90dfd519eaac78a7184341d19adb6096c68d499b59c7c6cf880
url: "https://pub.dev"
source: hosted
version: "7.4.4"
version: "7.7.0"
args:
dependency: "direct main"
description:
@@ -69,10 +69,10 @@ packages:
dependency: transitive
description:
name: coverage
sha256: "9086475ef2da7102a0c0a4e37e1e30707e7fb7b6d28c209f559a9c5f8ce42016"
sha256: "5da775aa218eaf2151c721b16c01c7676fbfdd99cebba2bf64e8b807a28ff94d"
url: "https://pub.dev"
source: hosted
version: "1.12.0"
version: "1.15.0"
crypto:
dependency: transitive
description:
@@ -165,10 +165,10 @@ packages:
dependency: "direct dev"
description:
name: lints
sha256: c35bb79562d980e9a453fc715854e1ed39e24e7d0297a880ef54e17f9874a9d7
sha256: a5e2b223cb7c9c8efdc663ef484fdd95bb243bff242ef5b13e26883547fce9a0
url: "https://pub.dev"
source: hosted
version: "5.1.1"
version: "6.0.0"
logging:
dependency: transitive
description:
@@ -189,10 +189,10 @@ packages:
dependency: transitive
description:
name: meta
sha256: e3641ec5d63ebf0d9b41bd43201a66e3fc79a65db5f61fc181f04cd27aab950c
sha256: "23f08335362185a5ea2ad3a4e597f1375e78bce8a040df5c600c8d3552ef2394"
url: "https://pub.dev"
source: hosted
version: "1.16.0"
version: "1.17.0"
mime:
dependency: transitive
description:
@@ -218,7 +218,7 @@ packages:
source: hosted
version: "2.2.0"
path:
dependency: transitive
dependency: "direct main"
description:
name: path
sha256: "75cca69d1490965be98c73ceaea117e8a04dd21217b37b292c9ddbec0d955bc5"
@@ -229,10 +229,10 @@ packages:
dependency: transitive
description:
name: petitparser
sha256: "07c8f0b1913bcde1ff0d26e57ace2f3012ccbf2b204e070290dad3bb22797646"
sha256: "9436fe11f82d7cc1642a8671e5aa4149ffa9ae9116e6cf6dd665fc0653e3825c"
url: "https://pub.dev"
source: hosted
version: "6.1.0"
version: "7.0.0"
pool:
dependency: transitive
description:
@@ -317,18 +317,18 @@ packages:
dependency: "direct main"
description:
name: sqflite_common_ffi
sha256: "1f3ef3888d3bfbb47785cc1dda0dc7dd7ebd8c1955d32a9e8e9dae1e38d1c4c1"
sha256: "9faa2fedc5385ef238ce772589f7718c24cdddd27419b609bb9c6f703ea27988"
url: "https://pub.dev"
source: hosted
version: "2.3.5"
version: "2.3.6"
sqlite3:
dependency: transitive
dependency: "direct main"
description:
name: sqlite3
sha256: "310af39c40dd0bb2058538333c9d9840a2725ae0b9f77e4fd09ad6696aa8f66e"
sha256: "608b56d594e4c8498c972c8f1507209f9fd74939971b948ddbbfbfd1c9cb3c15"
url: "https://pub.dev"
source: hosted
version: "2.7.5"
version: "2.7.7"
stack_trace:
dependency: transitive
description:
@@ -357,10 +357,10 @@ packages:
dependency: transitive
description:
name: synchronized
sha256: "0669c70faae6270521ee4f05bffd2919892d42d1276e6c495be80174b6bc0ef6"
sha256: c254ade258ec8282947a0acbbc90b9575b4f19673533ee46f2f6e9b3aeefd7c0
url: "https://pub.dev"
source: hosted
version: "3.3.1"
version: "3.4.0"
term_glyph:
dependency: transitive
description:
@@ -373,26 +373,26 @@ packages:
dependency: "direct dev"
description:
name: test
sha256: "301b213cd241ca982e9ba50266bd3f5bd1ea33f1455554c5abb85d1be0e2d87e"
sha256: "65e29d831719be0591f7b3b1a32a3cda258ec98c58c7b25f7b84241bc31215bb"
url: "https://pub.dev"
source: hosted
version: "1.25.15"
version: "1.26.2"
test_api:
dependency: transitive
description:
name: test_api
sha256: fb31f383e2ee25fbbfe06b40fe21e1e458d14080e3c67e7ba0acfde4df4e0bbd
sha256: "522f00f556e73044315fa4585ec3270f1808a4b186c936e612cab0b565ff1e00"
url: "https://pub.dev"
source: hosted
version: "0.7.4"
version: "0.7.6"
test_core:
dependency: transitive
description:
name: test_core
sha256: "84d17c3486c8dfdbe5e12a50c8ae176d15e2a771b96909a9442b40173649ccaa"
sha256: "80bf5a02b60af04b09e14f6fe68b921aad119493e26e490deaca5993fef1b05a"
url: "https://pub.dev"
source: hosted
version: "0.6.8"
version: "0.6.11"
typed_data:
dependency: transitive
description:
@@ -405,18 +405,18 @@ packages:
dependency: transitive
description:
name: vm_service
sha256: ddfa8d30d89985b96407efce8acbdd124701f96741f2d981ca860662f1c0dc02
sha256: "45caa6c5917fa127b5dbcfbd1fa60b14e583afdc08bfc96dda38886ca252eb60"
url: "https://pub.dev"
source: hosted
version: "15.0.0"
version: "15.0.2"
watcher:
dependency: transitive
description:
name: watcher
sha256: "69da27e49efa56a15f8afe8f4438c4ec02eff0a117df1b22ea4aad194fe1c104"
sha256: "0b7fd4a0bbc4b92641dbf20adfd7e3fd1398fe17102d94b674234563e110088a"
url: "https://pub.dev"
source: hosted
version: "1.1.1"
version: "1.1.2"
web:
dependency: transitive
description:
@@ -429,10 +429,10 @@ packages:
dependency: transitive
description:
name: web_socket
sha256: bfe6f435f6ec49cb6c01da1e275ae4228719e59a6b067048c51e72d9d63bcc4b
sha256: "34d64019aa8e36bf9842ac014bb5d2f5586ca73df5e4d9bf5c936975cae6982c"
url: "https://pub.dev"
source: hosted
version: "1.0.0"
version: "1.0.1"
web_socket_channel:
dependency: transitive
description:
@@ -453,10 +453,10 @@ packages:
dependency: "direct main"
description:
name: xml
sha256: b015a8ad1c488f66851d762d3090a21c600e479dc75e68328c52774040cf9226
sha256: "3202a47961c1a0af6097c9f8c1b492d705248ba309e6f7a72410422c05046851"
url: "https://pub.dev"
source: hosted
version: "6.5.0"
version: "6.6.0"
yaml:
dependency: transitive
description:
@@ -466,4 +466,4 @@ packages:
source: hosted
version: "3.1.3"
sdks:
dart: ">=3.7.0 <4.0.0"
dart: ">=3.8.0 <4.0.0"

View File

@@ -4,19 +4,21 @@ version: 1.0.0
homepage: https://git.pvv.ntnu.no/oysteikt/jadb
environment:
sdk: '>=3.0.0 <4.0.0'
sdk: '^3.8.0'
dependencies:
args: ^2.7.0
collection: ^1.19.1
collection: ^1.19.0
csv: ^6.0.0
equatable: ^2.0.7
sqflite_common: ^2.5.5
sqflite_common_ffi: ^2.3.5
equatable: ^2.0.0
path: ^1.9.1
sqflite_common: ^2.5.0
sqflite_common_ffi: ^2.3.0
sqlite3: ^2.7.7
xml: ^6.5.0
dev_dependencies:
lints: ^5.0.0
lints: ^6.0.0
test: ^1.25.15
executables:

View File

@@ -1,9 +1,9 @@
import 'package:collection/collection.dart';
import 'package:jadb/util/jouyou_kanji.dart';
import 'package:jadb/const_data/kanji_grades.dart';
import 'package:test/test.dart';
void main() {
test("Assert 2136 kanji in jouyou set", () {
expect(JOUYOU_KANJI.values.flattenedToSet.length, 2136);
test('Assert 2136 kanji in jouyou set', () {
expect(JOUYOU_KANJI_BY_GRADES.values.flattenedToSet.length, 2136);
});
}

View File

@@ -0,0 +1,33 @@
import 'dart:ffi';
import 'dart:io';
import 'package:jadb/models/create_empty_db.dart';
import 'package:jadb/search.dart';
import 'package:sqflite_common_ffi/sqflite_ffi.dart';
import 'package:sqlite3/open.dart';
import 'package:test/test.dart';
Future<DatabaseExecutor> setup_inmemory_database() async {
final libsqlitePath = Platform.environment['LIBSQLITE_PATH'];
if (libsqlitePath == null) {
throw Exception('LIBSQLITE_PATH is not set');
}
final dbConnection = await createDatabaseFactoryFfi(
ffiInit: () =>
open.overrideForAll(() => DynamicLibrary.open(libsqlitePath)),
).openDatabase(':memory:');
return dbConnection;
}
void main() {
test('Create empty db', () async {
final db = await setup_inmemory_database();
await createEmptyDb(db);
await db.jadbVerifyTables();
});
}

View File

@@ -0,0 +1,29 @@
import 'package:jadb/search.dart';
import 'package:test/test.dart';
import 'setup_database_connection.dart';
void main() {
test('Filter kanji', () async {
final connection = await setup_database_connection();
final result = await connection.filterKanji([
'a',
'b',
'c',
'',
'',
'',
'',
'',
'',
'.',
'!',
'@',
';',
'',
], deduplicate: false);
expect(result.join(), '漢字地字');
});
}

View File

@@ -1,23 +1,24 @@
import 'package:jadb/util/jouyou_kanji.dart';
import 'package:jadb/const_data/kanji_grades.dart';
import 'package:jadb/search.dart';
import 'package:test/test.dart';
import 'setup_database_connection.dart';
void main() {
test("Search a kanji", () async {
test('Search a kanji', () async {
final connection = await setup_database_connection();
final result = await connection.searchKanji('');
final result = await connection.jadbSearchKanji('');
expect(result, isNotNull);
});
group("Search all jouyou kanji", () {
JOUYOU_KANJI.forEach((grade, characters) {
test("Search all kanji in grade $grade", () async {
group('Search all jouyou kanji', () {
JOUYOU_KANJI_BY_GRADES.forEach((grade, characters) {
test('Search all kanji in grade $grade', () async {
final connection = await setup_database_connection();
for (final character in characters) {
final result = await connection.searchKanji(character);
final result = await connection.jadbSearchKanji(character);
expect(result, isNotNull);
}
}, timeout: Timeout.factor(10));

View File

@@ -1,28 +1,24 @@
import 'dart:io';
import 'package:jadb/_data_ingestion/open_local_db.dart';
import 'package:jadb/search.dart';
import 'package:sqflite_common/sqlite_api.dart';
Future<JaDBConnection> setup_database_connection() async {
final lib_sqlite_path = Platform.environment['LIBSQLITE_PATH'];
final jadb_path = Platform.environment['JADB_PATH'];
Future<Database> setup_database_connection() async {
final libSqlitePath = Platform.environment['LIBSQLITE_PATH'];
final jadbPath = Platform.environment['JADB_PATH'];
if (lib_sqlite_path == null) {
throw Exception("LIBSQLITE_PATH is not set");
if (libSqlitePath == null) {
throw Exception('LIBSQLITE_PATH is not set');
}
if (jadb_path == null) {
throw Exception("JADB_PATH is not set");
if (jadbPath == null) {
throw Exception('JADB_PATH is not set');
}
final db_connection = await openLocalDb(
libsqlitePath: lib_sqlite_path,
jadbPath: jadb_path,
final dbConnection = await openLocalDb(
libsqlitePath: libSqlitePath,
jadbPath: jadbPath,
);
if (db_connection == null) {
throw Exception("Failed to open database");
}
return JaDBConnection(db_connection);
return dbConnection;
}

View File

@@ -1,26 +1,62 @@
import 'package:jadb/search.dart';
import 'package:test/test.dart';
import 'setup_database_connection.dart';
void main() {
test("Search a word", () async {
test('Search a word - english - auto', () async {
final connection = await setup_database_connection();
final result = await connection.jadbSearchWord('kana');
expect(result, isNotNull);
});
final result = await connection.searchWord("kana");
test('Get word search count - english - auto', () async {
final connection = await setup_database_connection();
final result = await connection.jadbSearchWordCount('kana');
expect(result, isNotNull);
});
test('Search a word - japanese kana - auto', () async {
final connection = await setup_database_connection();
final result = await connection.jadbSearchWord('かな');
expect(result, isNotNull);
});
test('Get word search count - japanese kana - auto', () async {
final connection = await setup_database_connection();
final result = await connection.jadbSearchWordCount('かな');
expect(result, isNotNull);
});
test('Search a word - japanese kanji - auto', () async {
final connection = await setup_database_connection();
final result = await connection.jadbSearchWord('仮名');
expect(result, isNotNull);
});
test('Get word search count - japanese kanji - auto', () async {
final connection = await setup_database_connection();
final result = await connection.jadbSearchWordCount('仮名');
expect(result, isNotNull);
});
test('Get a word by id', () async {
final connection = await setup_database_connection();
final result = await connection.jadbGetWordById(1577090);
expect(result, isNotNull);
});
test(
"Serialize all words",
'Serialize all words',
() async {
final connection = await setup_database_connection();
// Test serializing all words
for (final letter in "aiueoksthnmyrw".split("")) {
await connection.searchWord(letter);
for (final letter in 'aiueoksthnmyrw'.split('')) {
await connection.jadbSearchWord(letter);
}
},
timeout: Timeout.factor(100),
skip: "Very slow test",
skip: 'Very slow test',
);
}

View File

@@ -2,65 +2,65 @@ import 'package:jadb/util/romaji_transliteration.dart';
import 'package:test/test.dart';
void main() {
group("Romaji -> Hiragana", () {
test("Basic test", () {
final result = transliterateLatinToHiragana("katamari");
expect(result, "かたまり");
group('Romaji -> Hiragana', () {
test('Basic test', () {
final result = transliterateLatinToHiragana('katamari');
expect(result, 'かたまり');
});
test("Basic test with diacritics", () {
final result = transliterateLatinToHiragana("gadamari");
expect(result, "がだまり");
test('Basic test with diacritics', () {
final result = transliterateLatinToHiragana('gadamari');
expect(result, 'がだまり');
});
test("wi and we", () {
final result = transliterateLatinToHiragana("wiwe");
expect(result, "うぃうぇ");
test('wi and we', () {
final result = transliterateLatinToHiragana('wiwe');
expect(result, 'うぃうぇ');
});
test("nb = mb", () {
final result = transliterateLatinToHiragana("kanpai");
expect(result, "かんぱい");
test('nb = mb', () {
final result = transliterateLatinToHiragana('kanpai');
expect(result, 'かんぱい');
final result2 = transliterateLatinToHiragana("kampai");
expect(result2, "かんぱい");
final result2 = transliterateLatinToHiragana('kampai');
expect(result2, 'かんぱい');
});
test("Double n", () {
final result = transliterateLatinToHiragana("konnichiha");
expect(result, "こんにちは");
test('Double n', () {
final result = transliterateLatinToHiragana('konnichiha');
expect(result, 'こんにちは');
});
test("Double consonant", () {
final result = transliterateLatinToHiragana("kappa");
expect(result, "かっぱ");
test('Double consonant', () {
final result = transliterateLatinToHiragana('kappa');
expect(result, 'かっぱ');
});
});
group("Hiragana -> Romaji", () {
test("Basic test", () {
final result = transliterateHiraganaToLatin("かたまり");
expect(result, "katamari");
group('Hiragana -> Romaji', () {
test('Basic test', () {
final result = transliterateHiraganaToLatin('かたまり');
expect(result, 'katamari');
});
test("Basic test with diacritics", () {
final result = transliterateHiraganaToLatin("がだまり");
expect(result, "gadamari");
test('Basic test with diacritics', () {
final result = transliterateHiraganaToLatin('がだまり');
expect(result, 'gadamari');
});
test("whi and whe", () {
final result = transliterateHiraganaToLatin("うぃうぇ");
expect(result, "whiwhe");
test('whi and whe', () {
final result = transliterateHiraganaToLatin('うぃうぇ');
expect(result, 'whiwhe');
});
test("Double n", () {
final result = transliterateHiraganaToLatin("こんにちは");
expect(result, "konnichiha");
test('Double n', () {
final result = transliterateHiraganaToLatin('こんにちは');
expect(result, 'konnichiha');
});
test("Double consonant", () {
final result = transliterateHiraganaToLatin("かっぱ");
expect(result, "kappa");
test('Double consonant', () {
final result = transliterateHiraganaToLatin('かっぱ');
expect(result, 'kappa');
});
});
}