9 Commits

Author SHA1 Message Date
9632b90952 search/kanji: split queries into separate functions
Some checks failed
Build database / evals (push) Successful in 12m28s
Run tests / evals (push) Failing after 28m41s
2026-02-28 18:57:57 +09:00
0b6e21af57 word_search_result: add romanization getters 2026-02-28 18:46:13 +09:00
30913ea6a1 search/word_search: fix casing of SearchMode variants 2026-02-28 18:27:54 +09:00
7731a4a599 Fix a few lints 2026-02-28 18:25:37 +09:00
8fb6baa03f README: add textual overview of the word search procedure 2026-02-28 14:52:22 +09:00
382af1add8 dart format 2026-02-28 14:37:17 +09:00
07d162436a search/word_search: split data queries into functions 2026-02-28 14:36:14 +09:00
f5bca61839 flake.lock: bump
Some checks failed
Build database / evals (push) Successful in 10m44s
Run tests / evals (push) Failing after 43m13s
2026-02-25 16:28:18 +09:00
056aaaa0ce tests/search_match_inference: add more cases
Some checks failed
Build database / evals (push) Has been cancelled
Run tests / evals (push) Has been cancelled
2026-02-25 12:42:38 +09:00
27 changed files with 697 additions and 430 deletions

View File

@@ -16,3 +16,26 @@ Note that while the license for the code is MIT, the data has various licenses.
| **Tanos JLPT levels:** | https://www.tanos.co.uk/jlpt/ | | **Tanos JLPT levels:** | https://www.tanos.co.uk/jlpt/ |
| **Kangxi Radicals:** | https://ctext.org/kangxi-zidian | | **Kangxi Radicals:** | https://ctext.org/kangxi-zidian |
## Implementation details
### Word search
The word search procedure is currently split into 3 parts:
1. **Entry ID query**:
Use a complex query with various scoring factors to try to get list of
database ids pointing at dictionary entries, sorted by how likely we think this
word is the word that the caller is looking for. The output here is a `List<int>`
2. **Data Query**:
Takes the entry id list from the last search, and performs all queries needed to retrieve
all the dictionary data for those IDs. The result is a struct with a bunch of flattened lists
with data for all the dictionary entries. These lists are sorted by the order that the ids
were provided.
3. **Regrouping**:
Takes the flattened data, and regroups the items into structs with a more "hierarchical" structure.
All data tagged with the same ID will end up in the same struct. Returns a list of these structs.

12
flake.lock generated
View File

@@ -3,7 +3,7 @@
"jmdict-src": { "jmdict-src": {
"flake": false, "flake": false,
"locked": { "locked": {
"narHash": "sha256-1if5Z1ynrCd05ySrvD6ZA1PfKBayhBFzUOe5vplwYXM=", "narHash": "sha256-lh46uougUzBrRhhwa7cOb32j5Jt9/RjBUhlVjwVzsII=",
"type": "file", "type": "file",
"url": "http://ftp.edrdg.org/pub/Nihongo/JMdict_e.gz" "url": "http://ftp.edrdg.org/pub/Nihongo/JMdict_e.gz"
}, },
@@ -15,7 +15,7 @@
"jmdict-with-examples-src": { "jmdict-with-examples-src": {
"flake": false, "flake": false,
"locked": { "locked": {
"narHash": "sha256-3Eb8iVSZFvuf4yH/53tDdN6Znt+tvvra6kd7GIv4LYE=", "narHash": "sha256-5oS2xDyetbuSM6ax3LUjYA3N60x+D3Hg41HEXGFMqLQ=",
"type": "file", "type": "file",
"url": "http://ftp.edrdg.org/pub/Nihongo/JMdict_e_examp.gz" "url": "http://ftp.edrdg.org/pub/Nihongo/JMdict_e_examp.gz"
}, },
@@ -27,7 +27,7 @@
"kanjidic2-src": { "kanjidic2-src": {
"flake": false, "flake": false,
"locked": { "locked": {
"narHash": "sha256-mg2cP3rX1wm+dTAQCNHthVcKUH5PZRhGbHv1AP2EwJQ=", "narHash": "sha256-orSeQqSxhn9TtX3anYtbiMEm7nFkuomGnIKoVIUR2CM=",
"type": "file", "type": "file",
"url": "https://www.edrdg.org/kanjidic/kanjidic2.xml.gz" "url": "https://www.edrdg.org/kanjidic/kanjidic2.xml.gz"
}, },
@@ -38,11 +38,11 @@
}, },
"nixpkgs": { "nixpkgs": {
"locked": { "locked": {
"lastModified": 1771369470, "lastModified": 1771848320,
"narHash": "sha256-0NBlEBKkN3lufyvFegY4TYv5mCNHbi5OmBDrzihbBMQ=", "narHash": "sha256-0MAd+0mun3K/Ns8JATeHT1sX28faLII5hVLq0L3BdZU=",
"owner": "NixOS", "owner": "NixOS",
"repo": "nixpkgs", "repo": "nixpkgs",
"rev": "0182a361324364ae3f436a63005877674cf45efb", "rev": "2fc6539b481e1d2569f25f8799236694180c0993",
"type": "github" "type": "github"
}, },
"original": { "original": {

View File

@@ -10,14 +10,15 @@ List<int?> getPriorityValues(XmlElement e, String prefix) {
final txt = pri.innerText; final txt = pri.innerText;
if (txt.startsWith('news')) { if (txt.startsWith('news')) {
news = int.parse(txt.substring(4)); news = int.parse(txt.substring(4));
} else if (txt.startsWith('ichi')) } else if (txt.startsWith('ichi')) {
ichi = int.parse(txt.substring(4)); ichi = int.parse(txt.substring(4));
else if (txt.startsWith('spec')) } else if (txt.startsWith('spec')) {
spec = int.parse(txt.substring(4)); spec = int.parse(txt.substring(4));
else if (txt.startsWith('gai')) } else if (txt.startsWith('gai')) {
gai = int.parse(txt.substring(3)); gai = int.parse(txt.substring(3));
else if (txt.startsWith('nf')) } else if (txt.startsWith('nf')) {
nf = int.parse(txt.substring(2)); nf = int.parse(txt.substring(2));
}
} }
return [news, ichi, spec, gai, nf]; return [news, ichi, spec, gai, nf];
} }
@@ -79,16 +80,16 @@ List<Entry> parseJMDictData(XmlElement root) {
final List<ReadingElement> readingEls = []; final List<ReadingElement> readingEls = [];
final List<Sense> senses = []; final List<Sense> senses = [];
for (final (kanjiNum, k_ele) in entry.findElements('k_ele').indexed) { for (final (kanjiNum, kEle) in entry.findElements('k_ele').indexed) {
final kePri = getPriorityValues(k_ele, 'ke'); final kePri = getPriorityValues(kEle, 'ke');
kanjiEls.add( kanjiEls.add(
KanjiElement( KanjiElement(
orderNum: kanjiNum + 1, orderNum: kanjiNum + 1,
info: k_ele info: kEle
.findElements('ke_inf') .findElements('ke_inf')
.map((e) => e.innerText.substring(1, e.innerText.length - 1)) .map((e) => e.innerText.substring(1, e.innerText.length - 1))
.toList(), .toList(),
reading: k_ele.findElements('keb').first.innerText, reading: kEle.findElements('keb').first.innerText,
news: kePri[0], news: kePri[0],
ichi: kePri[1], ichi: kePri[1],
spec: kePri[2], spec: kePri[2],
@@ -98,24 +99,24 @@ List<Entry> parseJMDictData(XmlElement root) {
); );
} }
for (final (orderNum, r_ele) in entry.findElements('r_ele').indexed) { for (final (orderNum, rEle) in entry.findElements('r_ele').indexed) {
final rePri = getPriorityValues(r_ele, 're'); final rePri = getPriorityValues(rEle, 're');
final readingDoesNotMatchKanji = r_ele final readingDoesNotMatchKanji = rEle
.findElements('re_nokanji') .findElements('re_nokanji')
.isNotEmpty; .isNotEmpty;
readingEls.add( readingEls.add(
ReadingElement( ReadingElement(
orderNum: orderNum + 1, orderNum: orderNum + 1,
readingDoesNotMatchKanji: readingDoesNotMatchKanji, readingDoesNotMatchKanji: readingDoesNotMatchKanji,
info: r_ele info: rEle
.findElements('re_inf') .findElements('re_inf')
.map((e) => e.innerText.substring(1, e.innerText.length - 1)) .map((e) => e.innerText.substring(1, e.innerText.length - 1))
.toList(), .toList(),
restrictions: r_ele restrictions: rEle
.findElements('re_restr') .findElements('re_restr')
.map((e) => e.innerText) .map((e) => e.innerText)
.toList(), .toList(),
reading: r_ele.findElements('reb').first.innerText, reading: rEle.findElements('reb').first.innerText,
news: rePri[0], news: rePri[0],
ichi: rePri[1], ichi: rePri[1],
spec: rePri[2], spec: rePri[2],

View File

@@ -19,19 +19,18 @@ Future<Database> openLocalDb({
throw Exception('JADB_PATH does not exist: $jadbPath'); throw Exception('JADB_PATH does not exist: $jadbPath');
} }
final db = final db = await createDatabaseFactoryFfi().openDatabase(
await createDatabaseFactoryFfi().openDatabase( jadbPath,
jadbPath, options: OpenDatabaseOptions(
options: OpenDatabaseOptions( onConfigure: (db) async {
onConfigure: (db) async { if (walMode) {
if (walMode) { await db.execute('PRAGMA journal_mode = WAL');
await db.execute('PRAGMA journal_mode = WAL'); }
} await db.execute('PRAGMA foreign_keys = ON');
await db.execute('PRAGMA foreign_keys = ON'); },
}, readOnly: !readWrite,
readOnly: !readWrite, ),
), );
);
if (verifyTablesExist) { if (verifyTablesExist) {
await db.jadbVerifyTables(); await db.jadbVerifyTables();

View File

@@ -1,4 +1,4 @@
const Map<(String?, String), int?> TANOS_JLPT_OVERRIDES = { const Map<(String?, String), int?> tanosJLPTOverrides = {
// N5: // N5:
(null, 'あなた'): 1223615, (null, 'あなた'): 1223615,
(null, 'あの'): 1000430, (null, 'あの'): 1000430,

View File

@@ -76,7 +76,7 @@ Future<int?> findEntry(
if ((entryIds.isEmpty || entryIds.length > 1) && useOverrides) { if ((entryIds.isEmpty || entryIds.length > 1) && useOverrides) {
print('No entry found, trying to fetch from overrides'); print('No entry found, trying to fetch from overrides');
final overrideEntries = word.readings final overrideEntries = word.readings
.map((reading) => TANOS_JLPT_OVERRIDES[(word.kanji, reading)]) .map((reading) => tanosJLPTOverrides[(word.kanji, reading)])
.whereType<int>() .whereType<int>()
.toSet(); .toSet();
@@ -86,7 +86,7 @@ Future<int?> findEntry(
); );
} else if (overrideEntries.isEmpty && } else if (overrideEntries.isEmpty &&
!word.readings.any( !word.readings.any(
(reading) => TANOS_JLPT_OVERRIDES.containsKey((word.kanji, reading)), (reading) => tanosJLPTOverrides.containsKey((word.kanji, reading)),
)) { )) {
throw Exception( throw Exception(
'No override entry found for ${word.toString()}: $entryIds', 'No override entry found for ${word.toString()}: $entryIds',

View File

@@ -1,6 +1,5 @@
/// Jouyou kanji sorted primarily by grades and secondarily by strokes. /// Jouyou kanji sorted primarily by grades and secondarily by strokes.
const Map<int, Map<int, List<String>>> const Map<int, Map<int, List<String>>> jouyouKanjiByGradeAndStrokeCount = {
JOUYOU_KANJI_BY_GRADE_AND_STROKE_COUNT = {
1: { 1: {
1: [''], 1: [''],
2: ['', '', '', '', '', '', '', ''], 2: ['', '', '', '', '', '', '', ''],
@@ -1861,8 +1860,8 @@ JOUYOU_KANJI_BY_GRADE_AND_STROKE_COUNT = {
}, },
}; };
final Map<int, List<String>> JOUYOU_KANJI_BY_GRADES = final Map<int, List<String>> jouyouKanjiByGrades =
JOUYOU_KANJI_BY_GRADE_AND_STROKE_COUNT.entries jouyouKanjiByGradeAndStrokeCount.entries
.expand((entry) => entry.value.entries) .expand((entry) => entry.value.entries)
.map((entry) => MapEntry(entry.key, entry.value)) .map((entry) => MapEntry(entry.key, entry.value))
.fold<Map<int, List<String>>>( .fold<Map<int, List<String>>>(

View File

@@ -1,4 +1,4 @@
const Map<int, List<String>> RADICALS = { const Map<int, List<String>> radicals = {
1: ['', '', '', '', '', ''], 1: ['', '', '', '', '', ''],
2: [ 2: [
'', '',

View File

@@ -6,6 +6,7 @@ import 'package:jadb/models/word_search/word_search_ruby.dart';
import 'package:jadb/models/word_search/word_search_sense.dart'; import 'package:jadb/models/word_search/word_search_sense.dart';
import 'package:jadb/models/word_search/word_search_sources.dart'; import 'package:jadb/models/word_search/word_search_sources.dart';
import 'package:jadb/search/word_search/word_search.dart'; import 'package:jadb/search/word_search/word_search.dart';
import 'package:jadb/util/romaji_transliteration.dart';
/// A class representing a single dictionary entry from a word search. /// A class representing a single dictionary entry from a word search.
class WordSearchResult { class WordSearchResult {
@@ -44,6 +45,35 @@ class WordSearchResult {
/// the original searchword. /// the original searchword.
List<WordSearchMatchSpan>? matchSpans; List<WordSearchMatchSpan>? matchSpans;
/// All contents of [japanese], transliterated to romaji
List<String> get romaji => japanese
.map((word) => transliterateKanaToLatin(word.furigana ?? word.base))
.toList();
/// All contents of [japanase], where the furigana has either been transliterated to romaji, or
/// contains the furigana transliteration of [WordSearchRuby.base].
List<WordSearchRuby> get romajiRubys => japanese
.map(
(word) => WordSearchRuby(
base: word.base,
furigana: word.furigana != null
? transliterateKanaToLatin(word.furigana!)
: transliterateKanaToLatin(word.base),
),
)
.toList();
/// The same list of spans as [matchSpans], but the positions have been adjusted for romaji conversion
///
/// This is mostly useful in conjunction with [romajiRubys].
List<WordSearchMatchSpan>? get romajiMatchSpans {
if (matchSpans == null) {
return null;
}
throw UnimplementedError('Not yet implemented');
}
WordSearchResult({ WordSearchResult({
required this.score, required this.score,
required this.entryId, required this.entryId,
@@ -107,7 +137,7 @@ class WordSearchResult {
/// Infers which part(s) of this word search result matched the search keyword, and populates [matchSpans] accordingly. /// Infers which part(s) of this word search result matched the search keyword, and populates [matchSpans] accordingly.
void inferMatchSpans( void inferMatchSpans(
String searchword, { String searchword, {
SearchMode searchMode = SearchMode.Auto, SearchMode searchMode = SearchMode.auto,
}) { }) {
// TODO: handle wildcards like '?' and '*' when that becomes supported in the search. // TODO: handle wildcards like '?' and '*' when that becomes supported in the search.
// TODO: If the searchMode is provided, we can use that to narrow down which part of the word search results to look at. // TODO: If the searchMode is provided, we can use that to narrow down which part of the word search results to look at.
@@ -163,7 +193,7 @@ class WordSearchResult {
this.matchSpans = matchSpans; this.matchSpans = matchSpans;
} }
String _formatJapaneseWord(WordSearchRuby word) => static String _formatJapaneseWord(WordSearchRuby word) =>
word.furigana == null ? word.base : '${word.base} (${word.furigana})'; word.furigana == null ? word.base : '${word.base} (${word.furigana})';
@override @override

View File

@@ -30,7 +30,7 @@ extension JaDBConnection on DatabaseExecutor {
/// Search for a word in the database. /// Search for a word in the database.
Future<List<WordSearchResult>?> jadbSearchWord( Future<List<WordSearchResult>?> jadbSearchWord(
String word, { String word, {
SearchMode searchMode = SearchMode.Auto, SearchMode searchMode = SearchMode.auto,
int page = 0, int page = 0,
int? pageSize, int? pageSize,
}) => searchWordWithDbConnection( }) => searchWordWithDbConnection(
@@ -54,7 +54,7 @@ extension JaDBConnection on DatabaseExecutor {
/// Search for a word in the database, and return the count of results. /// Search for a word in the database, and return the count of results.
Future<int?> jadbSearchWordCount( Future<int?> jadbSearchWordCount(
String word, { String word, {
SearchMode searchMode = SearchMode.Auto, SearchMode searchMode = SearchMode.auto,
}) => searchWordCountWithDbConnection(this, word, searchMode: searchMode); }) => searchWordCountWithDbConnection(this, word, searchMode: searchMode);
/// Given a list of radicals, search which kanji contains all /// Given a list of radicals, search which kanji contains all

View File

@@ -5,140 +5,186 @@ import 'package:jadb/table_names/kanjidic.dart';
import 'package:jadb/table_names/radkfile.dart'; import 'package:jadb/table_names/radkfile.dart';
import 'package:sqflite_common/sqflite.dart'; import 'package:sqflite_common/sqflite.dart';
Future<List<Map<String, Object?>>> _charactersQuery(
DatabaseExecutor connection,
String kanji,
) => connection.query(
KANJIDICTableNames.character,
where: 'literal = ?',
whereArgs: [kanji],
);
Future<List<Map<String, Object?>>> _codepointsQuery(
DatabaseExecutor connection,
String kanji,
) => connection.query(
KANJIDICTableNames.codepoint,
where: 'kanji = ?',
whereArgs: [kanji],
);
Future<List<Map<String, Object?>>> _kunyomisQuery(
DatabaseExecutor connection,
String kanji,
) => connection.query(
KANJIDICTableNames.kunyomi,
where: 'kanji = ?',
whereArgs: [kanji],
orderBy: 'orderNum',
);
Future<List<Map<String, Object?>>> _onyomisQuery(
DatabaseExecutor connection,
String kanji,
) => connection.query(
KANJIDICTableNames.onyomi,
where: 'kanji = ?',
whereArgs: [kanji],
orderBy: 'orderNum',
);
Future<List<Map<String, Object?>>> _meaningsQuery(
DatabaseExecutor connection,
String kanji,
) => connection.query(
KANJIDICTableNames.meaning,
where: 'kanji = ? AND language = ?',
whereArgs: [kanji, 'eng'],
orderBy: 'orderNum',
);
Future<List<Map<String, Object?>>> _nanorisQuery(
DatabaseExecutor connection,
String kanji,
) => connection.query(
KANJIDICTableNames.nanori,
where: 'kanji = ?',
whereArgs: [kanji],
);
Future<List<Map<String, Object?>>> _dictionaryReferencesQuery(
DatabaseExecutor connection,
String kanji,
) => connection.query(
KANJIDICTableNames.dictionaryReference,
where: 'kanji = ?',
whereArgs: [kanji],
);
Future<List<Map<String, Object?>>> _queryCodesQuery(
DatabaseExecutor connection,
String kanji,
) => connection.query(
KANJIDICTableNames.queryCode,
where: 'kanji = ?',
whereArgs: [kanji],
);
Future<List<Map<String, Object?>>> _radicalsQuery(
DatabaseExecutor connection,
String kanji,
) => connection.rawQuery(
'''
SELECT DISTINCT
"XREF__KANJIDIC_Radical__RADKFILE"."radicalSymbol" AS "symbol",
"names"
FROM "${KANJIDICTableNames.radical}"
JOIN "XREF__KANJIDIC_Radical__RADKFILE" USING ("radicalId")
LEFT JOIN (
SELECT "radicalId", group_concat("name") AS "names"
FROM "${KANJIDICTableNames.radicalName}"
GROUP BY "radicalId"
) USING ("radicalId")
WHERE "${KANJIDICTableNames.radical}"."kanji" = ?
''',
[kanji],
);
Future<List<Map<String, Object?>>> _partsQuery(
DatabaseExecutor connection,
String kanji,
) => connection.query(
RADKFILETableNames.radkfile,
where: 'kanji = ?',
whereArgs: [kanji],
);
Future<List<Map<String, Object?>>> _readingsQuery(
DatabaseExecutor connection,
String kanji,
) => connection.query(
KANJIDICTableNames.reading,
where: 'kanji = ?',
whereArgs: [kanji],
);
Future<List<Map<String, Object?>>> _strokeMiscountsQuery(
DatabaseExecutor connection,
String kanji,
) => connection.query(
KANJIDICTableNames.strokeMiscount,
where: 'kanji = ?',
whereArgs: [kanji],
);
// Future<List<Map<String, Object?>>> _variantsQuery(
// DatabaseExecutor connection,
// String kanji,
// ) => connection.query(
// KANJIDICTableNames.variant,
// where: 'kanji = ?',
// whereArgs: [kanji],
// );
/// Searches for a kanji character and returns its details, or null if the kanji is not found in the database. /// Searches for a kanji character and returns its details, or null if the kanji is not found in the database.
Future<KanjiSearchResult?> searchKanjiWithDbConnection( Future<KanjiSearchResult?> searchKanjiWithDbConnection(
DatabaseExecutor connection, DatabaseExecutor connection,
String kanji, String kanji,
) async { ) async {
late final List<Map<String, Object?>> characters; late final List<Map<String, Object?>> characters;
final charactersQuery = connection.query(
KANJIDICTableNames.character,
where: 'literal = ?',
whereArgs: [kanji],
);
late final List<Map<String, Object?>> codepoints; late final List<Map<String, Object?>> codepoints;
final codepointsQuery = connection.query(
KANJIDICTableNames.codepoint,
where: 'kanji = ?',
whereArgs: [kanji],
);
late final List<Map<String, Object?>> kunyomis; late final List<Map<String, Object?>> kunyomis;
final kunyomisQuery = connection.query(
KANJIDICTableNames.kunyomi,
where: 'kanji = ?',
whereArgs: [kanji],
orderBy: 'orderNum',
);
late final List<Map<String, Object?>> onyomis; late final List<Map<String, Object?>> onyomis;
final onyomisQuery = connection.query(
KANJIDICTableNames.onyomi,
where: 'kanji = ?',
whereArgs: [kanji],
orderBy: 'orderNum',
);
late final List<Map<String, Object?>> meanings; late final List<Map<String, Object?>> meanings;
final meaningsQuery = connection.query(
KANJIDICTableNames.meaning,
where: 'kanji = ? AND language = ?',
whereArgs: [kanji, 'eng'],
orderBy: 'orderNum',
);
late final List<Map<String, Object?>> nanoris; late final List<Map<String, Object?>> nanoris;
final nanorisQuery = connection.query(
KANJIDICTableNames.nanori,
where: 'kanji = ?',
whereArgs: [kanji],
);
late final List<Map<String, Object?>> dictionaryReferences; late final List<Map<String, Object?>> dictionaryReferences;
final dictionaryReferencesQuery = connection.query(
KANJIDICTableNames.dictionaryReference,
where: 'kanji = ?',
whereArgs: [kanji],
);
late final List<Map<String, Object?>> queryCodes; late final List<Map<String, Object?>> queryCodes;
final queryCodesQuery = connection.query(
KANJIDICTableNames.queryCode,
where: 'kanji = ?',
whereArgs: [kanji],
);
late final List<Map<String, Object?>> radicals; late final List<Map<String, Object?>> radicals;
final radicalsQuery = connection.rawQuery(
'''
SELECT DISTINCT
"XREF__KANJIDIC_Radical__RADKFILE"."radicalSymbol" AS "symbol",
"names"
FROM "${KANJIDICTableNames.radical}"
JOIN "XREF__KANJIDIC_Radical__RADKFILE" USING ("radicalId")
LEFT JOIN (
SELECT "radicalId", group_concat("name") AS "names"
FROM "${KANJIDICTableNames.radicalName}"
GROUP BY "radicalId"
) USING ("radicalId")
WHERE "${KANJIDICTableNames.radical}"."kanji" = ?
''',
[kanji],
);
late final List<Map<String, Object?>> parts; late final List<Map<String, Object?>> parts;
final partsQuery = connection.query(
RADKFILETableNames.radkfile,
where: 'kanji = ?',
whereArgs: [kanji],
);
late final List<Map<String, Object?>> readings; late final List<Map<String, Object?>> readings;
final readingsQuery = connection.query(
KANJIDICTableNames.reading,
where: 'kanji = ?',
whereArgs: [kanji],
);
late final List<Map<String, Object?>> strokeMiscounts; late final List<Map<String, Object?>> strokeMiscounts;
final strokeMiscountsQuery = connection.query(
KANJIDICTableNames.strokeMiscount,
where: 'kanji = ?',
whereArgs: [kanji],
);
// TODO: add variant data to result // TODO: add variant data to result
// late final List<Map<String, Object?>> variants; // late final List<Map<String, Object?>> variants;
// final variants_query = connection.query(
// KANJIDICTableNames.variant,
// where: "kanji = ?",
// whereArgs: [kanji],
// );
// TODO: Search for kunyomi and onyomi usage of the characters // TODO: Search for kunyomi and onyomi usage of the characters
// from JMDict. We'll need to fuzzy aquery JMDict_KanjiElement for mathces, // from JMDict. We'll need to fuzzy aquery JMDict_KanjiElement for matches,
// filter JMdict_ReadingElement for kunyomi/onyomi, and then sort the main entry // filter JMdict_ReadingElement for kunyomi/onyomi, and then sort the main entry
// by JLPT, news frequency, etc. // by JLPT, news frequency, etc.
await charactersQuery.then((value) => characters = value); await _charactersQuery(connection, kanji).then((value) => characters = value);
if (characters.isEmpty) { if (characters.isEmpty) {
return null; return null;
} }
await Future.wait({ await Future.wait({
codepointsQuery.then((value) => codepoints = value), _codepointsQuery(connection, kanji).then((value) => codepoints = value),
kunyomisQuery.then((value) => kunyomis = value), _kunyomisQuery(connection, kanji).then((value) => kunyomis = value),
onyomisQuery.then((value) => onyomis = value), _onyomisQuery(connection, kanji).then((value) => onyomis = value),
meaningsQuery.then((value) => meanings = value), _meaningsQuery(connection, kanji).then((value) => meanings = value),
nanorisQuery.then((value) => nanoris = value), _nanorisQuery(connection, kanji).then((value) => nanoris = value),
dictionaryReferencesQuery.then((value) => dictionaryReferences = value), _dictionaryReferencesQuery(
queryCodesQuery.then((value) => queryCodes = value), connection,
radicalsQuery.then((value) => radicals = value), kanji,
partsQuery.then((value) => parts = value), ).then((value) => dictionaryReferences = value),
readingsQuery.then((value) => readings = value), _queryCodesQuery(connection, kanji).then((value) => queryCodes = value),
strokeMiscountsQuery.then((value) => strokeMiscounts = value), _radicalsQuery(connection, kanji).then((value) => radicals = value),
_partsQuery(connection, kanji).then((value) => parts = value),
_readingsQuery(connection, kanji).then((value) => readings = value),
_strokeMiscountsQuery(
connection,
kanji,
).then((value) => strokeMiscounts = value),
// variants_query.then((value) => variants = value), // variants_query.then((value) => variants = value),
}); });

View File

@@ -53,274 +53,363 @@ class LinearWordQueryData {
}); });
} }
Future<List<Map<String, Object?>>> _sensesQuery(
DatabaseExecutor connection,
List<int> entryIds,
) => connection.query(
JMdictTableNames.sense,
where: 'entryId IN (${List.filled(entryIds.length, '?').join(',')})',
whereArgs: entryIds,
);
Future<List<Map<String, Object?>>> _readingelementsQuery(
DatabaseExecutor connection,
List<int> entryIds,
) => connection.query(
JMdictTableNames.readingElement,
where: 'entryId IN (${List.filled(entryIds.length, '?').join(',')})',
whereArgs: entryIds,
orderBy: 'orderNum',
);
Future<List<Map<String, Object?>>> _kanjielementsQuery(
DatabaseExecutor connection,
List<int> entryIds,
) => connection.query(
JMdictTableNames.kanjiElement,
where: 'entryId IN (${List.filled(entryIds.length, '?').join(',')})',
whereArgs: entryIds,
orderBy: 'orderNum',
);
Future<List<Map<String, Object?>>> _jlpttagsQuery(
DatabaseExecutor connection,
List<int> entryIds,
) => connection.query(
TanosJLPTTableNames.jlptTag,
where: 'entryId IN (${List.filled(entryIds.length, '?').join(',')})',
whereArgs: entryIds,
);
Future<List<Map<String, Object?>>> _commonentriesQuery(
DatabaseExecutor connection,
List<int> entryIds,
) => connection.query(
'JMdict_EntryCommon',
where: 'entryId IN (${List.filled(entryIds.length, '?').join(',')})',
whereArgs: entryIds,
);
// Sense queries
Future<List<Map<String, Object?>>> _senseantonymsQuery(
DatabaseExecutor connection,
List<int> senseIds,
) => connection.rawQuery(
"""
SELECT
"${JMdictTableNames.senseAntonyms}".senseId,
"${JMdictTableNames.senseAntonyms}".ambiguous,
"${JMdictTableNames.senseAntonyms}".xrefEntryId,
"JMdict_BaseAndFurigana"."base",
"JMdict_BaseAndFurigana"."furigana"
FROM "${JMdictTableNames.senseAntonyms}"
JOIN "JMdict_BaseAndFurigana"
ON "${JMdictTableNames.senseAntonyms}"."xrefEntryId" = "JMdict_BaseAndFurigana"."entryId"
WHERE
"senseId" IN (${List.filled(senseIds.length, '?').join(',')})
AND "JMdict_BaseAndFurigana"."isFirst"
ORDER BY
"${JMdictTableNames.senseAntonyms}"."senseId",
"${JMdictTableNames.senseAntonyms}"."xrefEntryId"
""",
[...senseIds],
);
Future<List<Map<String, Object?>>> senseseealsosQuery(
DatabaseExecutor connection,
List<int> senseIds,
) => connection.rawQuery(
"""
SELECT
"${JMdictTableNames.senseSeeAlso}"."senseId",
"${JMdictTableNames.senseSeeAlso}"."ambiguous",
"${JMdictTableNames.senseSeeAlso}"."xrefEntryId",
"JMdict_BaseAndFurigana"."base",
"JMdict_BaseAndFurigana"."furigana"
FROM "${JMdictTableNames.senseSeeAlso}"
JOIN "JMdict_BaseAndFurigana"
ON "${JMdictTableNames.senseSeeAlso}"."xrefEntryId" = "JMdict_BaseAndFurigana"."entryId"
WHERE
"senseId" IN (${List.filled(senseIds.length, '?').join(',')})
AND "JMdict_BaseAndFurigana"."isFirst"
ORDER BY
"${JMdictTableNames.senseSeeAlso}"."senseId",
"${JMdictTableNames.senseSeeAlso}"."xrefEntryId"
""",
[...senseIds],
);
Future<List<Map<String, Object?>>> _sensedialectsQuery(
DatabaseExecutor connection,
List<int> senseIds,
) => connection.query(
JMdictTableNames.senseDialect,
where: 'senseId IN (${List.filled(senseIds.length, '?').join(',')})',
whereArgs: senseIds,
);
Future<List<Map<String, Object?>>> _sensefieldsQuery(
DatabaseExecutor connection,
List<int> senseIds,
) => connection.query(
JMdictTableNames.senseField,
where: 'senseId IN (${List.filled(senseIds.length, '?').join(',')})',
whereArgs: senseIds,
);
Future<List<Map<String, Object?>>> _senseglossariesQuery(
DatabaseExecutor connection,
List<int> senseIds,
) => connection.query(
JMdictTableNames.senseGlossary,
where: 'senseId IN (${List.filled(senseIds.length, '?').join(',')})',
whereArgs: senseIds,
);
Future<List<Map<String, Object?>>> _senseinfosQuery(
DatabaseExecutor connection,
List<int> senseIds,
) => connection.query(
JMdictTableNames.senseInfo,
where: 'senseId IN (${List.filled(senseIds.length, '?').join(',')})',
whereArgs: senseIds,
);
Future<List<Map<String, Object?>>> _senselanguagesourcesQuery(
DatabaseExecutor connection,
List<int> senseIds,
) => connection.query(
JMdictTableNames.senseLanguageSource,
where: 'senseId IN (${List.filled(senseIds.length, '?').join(',')})',
whereArgs: senseIds,
);
Future<List<Map<String, Object?>>> _sensemiscsQuery(
DatabaseExecutor connection,
List<int> senseIds,
) => connection.query(
JMdictTableNames.senseMisc,
where: 'senseId IN (${List.filled(senseIds.length, '?').join(',')})',
whereArgs: senseIds,
);
Future<List<Map<String, Object?>>> _sensepossQuery(
DatabaseExecutor connection,
List<int> senseIds,
) => connection.query(
JMdictTableNames.sensePOS,
where: 'senseId IN (${List.filled(senseIds.length, '?').join(',')})',
whereArgs: senseIds,
);
Future<List<Map<String, Object?>>> _senserestrictedtokanjisQuery(
DatabaseExecutor connection,
List<int> senseIds,
) => connection.query(
JMdictTableNames.senseRestrictedToKanji,
where: 'senseId IN (${List.filled(senseIds.length, '?').join(',')})',
whereArgs: senseIds,
);
Future<List<Map<String, Object?>>> _senserestrictedtoreadingsQuery(
DatabaseExecutor connection,
List<int> senseIds,
) => connection.query(
JMdictTableNames.senseRestrictedToReading,
where: 'senseId IN (${List.filled(senseIds.length, '?').join(',')})',
whereArgs: senseIds,
);
Future<List<Map<String, Object?>>> _examplesentencesQuery(
DatabaseExecutor connection,
List<int> senseIds,
) => connection.query(
'JMdict_ExampleSentence',
where: 'senseId IN (${List.filled(senseIds.length, '?').join(',')})',
whereArgs: senseIds,
);
// Reading/kanji elements queries
Future<List<Map<String, Object?>>> _readingelementinfosQuery(
DatabaseExecutor connection,
List<int> readingIds,
) => connection.query(
JMdictTableNames.readingInfo,
where: '(elementId) IN (${List.filled(readingIds.length, '?').join(',')})',
whereArgs: readingIds,
);
Future<List<Map<String, Object?>>> _readingelementrestrictionsQuery(
DatabaseExecutor connection,
List<int> readingIds,
) => connection.query(
JMdictTableNames.readingRestriction,
where: '(elementId) IN (${List.filled(readingIds.length, '?').join(',')})',
whereArgs: readingIds,
);
Future<List<Map<String, Object?>>> _kanjielementinfosQuery(
DatabaseExecutor connection,
List<int> kanjiIds,
) => connection.query(
JMdictTableNames.kanjiInfo,
where: '(elementId) IN (${List.filled(kanjiIds.length, '?').join(',')})',
whereArgs: kanjiIds,
);
// Xref queries
Future<LinearWordQueryData?> _senseantonymdataQuery(
DatabaseExecutor connection,
List<int> entryIds,
) => fetchLinearWordQueryData(connection, entryIds, fetchXrefData: false);
Future<LinearWordQueryData?> _senseseealsodataQuery(
DatabaseExecutor connection,
List<int> entryIds,
) => fetchLinearWordQueryData(connection, entryIds, fetchXrefData: false);
// Full query
Future<LinearWordQueryData> fetchLinearWordQueryData( Future<LinearWordQueryData> fetchLinearWordQueryData(
DatabaseExecutor connection, DatabaseExecutor connection,
List<int> entryIds, { List<int> entryIds, {
bool fetchXrefData = true, bool fetchXrefData = true,
}) async { }) async {
late final List<Map<String, Object?>> senses; late final List<Map<String, Object?>> senses;
final Future<List<Map<String, Object?>>> sensesQuery = connection.query(
JMdictTableNames.sense,
where: 'entryId IN (${List.filled(entryIds.length, '?').join(',')})',
whereArgs: entryIds,
);
late final List<Map<String, Object?>> readingElements; late final List<Map<String, Object?>> readingElements;
final Future<List<Map<String, Object?>>> readingelementsQuery = connection
.query(
JMdictTableNames.readingElement,
where: 'entryId IN (${List.filled(entryIds.length, '?').join(',')})',
whereArgs: entryIds,
orderBy: 'orderNum',
);
late final List<Map<String, Object?>> kanjiElements; late final List<Map<String, Object?>> kanjiElements;
final Future<List<Map<String, Object?>>> kanjielementsQuery = connection
.query(
JMdictTableNames.kanjiElement,
where: 'entryId IN (${List.filled(entryIds.length, '?').join(',')})',
whereArgs: entryIds,
orderBy: 'orderNum',
);
late final List<Map<String, Object?>> jlptTags; late final List<Map<String, Object?>> jlptTags;
final Future<List<Map<String, Object?>>> jlpttagsQuery = connection.query(
TanosJLPTTableNames.jlptTag,
where: 'entryId IN (${List.filled(entryIds.length, '?').join(',')})',
whereArgs: entryIds,
);
late final List<Map<String, Object?>> commonEntries; late final List<Map<String, Object?>> commonEntries;
final Future<List<Map<String, Object?>>> commonentriesQuery = connection
.query(
'JMdict_EntryCommon',
where: 'entryId IN (${List.filled(entryIds.length, '?').join(',')})',
whereArgs: entryIds,
);
await Future.wait([ await Future.wait([
sensesQuery.then((value) => senses = value), _sensesQuery(connection, entryIds).then((value) => senses = value),
readingelementsQuery.then((value) => readingElements = value), _readingelementsQuery(
kanjielementsQuery.then((value) => kanjiElements = value), connection,
jlpttagsQuery.then((value) => jlptTags = value), entryIds,
commonentriesQuery.then((value) => commonEntries = value), ).then((value) => readingElements = value),
_kanjielementsQuery(
connection,
entryIds,
).then((value) => kanjiElements = value),
_jlpttagsQuery(connection, entryIds).then((value) => jlptTags = value),
_commonentriesQuery(
connection,
entryIds,
).then((value) => commonEntries = value),
]); ]);
// Sense queries
final senseIds = senses.map((sense) => sense['senseId'] as int).toList(); final senseIds = senses.map((sense) => sense['senseId'] as int).toList();
late final List<Map<String, Object?>> senseAntonyms; late final List<Map<String, Object?>> senseAntonyms;
final Future<List<Map<String, Object?>>> senseantonymsQuery = connection
.rawQuery(
"""
SELECT
"${JMdictTableNames.senseAntonyms}".senseId,
"${JMdictTableNames.senseAntonyms}".ambiguous,
"${JMdictTableNames.senseAntonyms}".xrefEntryId,
"JMdict_BaseAndFurigana"."base",
"JMdict_BaseAndFurigana"."furigana"
FROM "${JMdictTableNames.senseAntonyms}"
JOIN "JMdict_BaseAndFurigana"
ON "${JMdictTableNames.senseAntonyms}"."xrefEntryId" = "JMdict_BaseAndFurigana"."entryId"
WHERE
"senseId" IN (${List.filled(senseIds.length, '?').join(',')})
AND "JMdict_BaseAndFurigana"."isFirst"
ORDER BY
"${JMdictTableNames.senseAntonyms}"."senseId",
"${JMdictTableNames.senseAntonyms}"."xrefEntryId"
""",
[...senseIds],
);
late final List<Map<String, Object?>> senseDialects; late final List<Map<String, Object?>> senseDialects;
final Future<List<Map<String, Object?>>> sensedialectsQuery = connection
.query(
JMdictTableNames.senseDialect,
where: 'senseId IN (${List.filled(senseIds.length, '?').join(',')})',
whereArgs: senseIds,
);
late final List<Map<String, Object?>> senseFields; late final List<Map<String, Object?>> senseFields;
final Future<List<Map<String, Object?>>> sensefieldsQuery = connection.query(
JMdictTableNames.senseField,
where: 'senseId IN (${List.filled(senseIds.length, '?').join(',')})',
whereArgs: senseIds,
);
late final List<Map<String, Object?>> senseGlossaries; late final List<Map<String, Object?>> senseGlossaries;
final Future<List<Map<String, Object?>>> senseglossariesQuery = connection
.query(
JMdictTableNames.senseGlossary,
where: 'senseId IN (${List.filled(senseIds.length, '?').join(',')})',
whereArgs: senseIds,
);
late final List<Map<String, Object?>> senseInfos; late final List<Map<String, Object?>> senseInfos;
final Future<List<Map<String, Object?>>> senseinfosQuery = connection.query(
JMdictTableNames.senseInfo,
where: 'senseId IN (${List.filled(senseIds.length, '?').join(',')})',
whereArgs: senseIds,
);
late final List<Map<String, Object?>> senseLanguageSources; late final List<Map<String, Object?>> senseLanguageSources;
final Future<List<Map<String, Object?>>> senselanguagesourcesQuery =
connection.query(
JMdictTableNames.senseLanguageSource,
where: 'senseId IN (${List.filled(senseIds.length, '?').join(',')})',
whereArgs: senseIds,
);
late final List<Map<String, Object?>> senseMiscs; late final List<Map<String, Object?>> senseMiscs;
final Future<List<Map<String, Object?>>> sensemiscsQuery = connection.query(
JMdictTableNames.senseMisc,
where: 'senseId IN (${List.filled(senseIds.length, '?').join(',')})',
whereArgs: senseIds,
);
late final List<Map<String, Object?>> sensePOSs; late final List<Map<String, Object?>> sensePOSs;
final Future<List<Map<String, Object?>>> sensepossQuery = connection.query(
JMdictTableNames.sensePOS,
where: 'senseId IN (${List.filled(senseIds.length, '?').join(',')})',
whereArgs: senseIds,
);
late final List<Map<String, Object?>> senseRestrictedToKanjis; late final List<Map<String, Object?>> senseRestrictedToKanjis;
final Future<List<Map<String, Object?>>> senserestrictedtokanjisQuery =
connection.query(
JMdictTableNames.senseRestrictedToKanji,
where: 'senseId IN (${List.filled(senseIds.length, '?').join(',')})',
whereArgs: senseIds,
);
late final List<Map<String, Object?>> senseRestrictedToReadings; late final List<Map<String, Object?>> senseRestrictedToReadings;
final Future<List<Map<String, Object?>>> senserestrictedtoreadingsQuery =
connection.query(
JMdictTableNames.senseRestrictedToReading,
where: 'senseId IN (${List.filled(senseIds.length, '?').join(',')})',
whereArgs: senseIds,
);
late final List<Map<String, Object?>> senseSeeAlsos; late final List<Map<String, Object?>> senseSeeAlsos;
final Future<List<Map<String, Object?>>> senseseealsosQuery = connection
.rawQuery(
"""
SELECT
"${JMdictTableNames.senseSeeAlso}"."senseId",
"${JMdictTableNames.senseSeeAlso}"."ambiguous",
"${JMdictTableNames.senseSeeAlso}"."xrefEntryId",
"JMdict_BaseAndFurigana"."base",
"JMdict_BaseAndFurigana"."furigana"
FROM "${JMdictTableNames.senseSeeAlso}"
JOIN "JMdict_BaseAndFurigana"
ON "${JMdictTableNames.senseSeeAlso}"."xrefEntryId" = "JMdict_BaseAndFurigana"."entryId"
WHERE
"senseId" IN (${List.filled(senseIds.length, '?').join(',')})
AND "JMdict_BaseAndFurigana"."isFirst"
ORDER BY
"${JMdictTableNames.senseSeeAlso}"."senseId",
"${JMdictTableNames.senseSeeAlso}"."xrefEntryId"
""",
[...senseIds],
);
late final List<Map<String, Object?>> exampleSentences; late final List<Map<String, Object?>> exampleSentences;
final Future<List<Map<String, Object?>>> examplesentencesQuery = connection
.query(
'JMdict_ExampleSentence',
where: 'senseId IN (${List.filled(senseIds.length, '?').join(',')})',
whereArgs: senseIds,
);
// Reading queries
final readingIds = readingElements final readingIds = readingElements
.map((element) => element['elementId'] as int) .map((element) => element['elementId'] as int)
.toList(); .toList();
late final List<Map<String, Object?>> readingElementInfos;
final Future<List<Map<String, Object?>>> readingelementinfosQuery =
connection.query(
JMdictTableNames.readingInfo,
where:
'(elementId) IN (${List.filled(readingIds.length, '?').join(',')})',
whereArgs: readingIds,
);
late final List<Map<String, Object?>> readingElementRestrictions;
final Future<List<Map<String, Object?>>> readingelementrestrictionsQuery =
connection.query(
JMdictTableNames.readingRestriction,
where:
'(elementId) IN (${List.filled(readingIds.length, '?').join(',')})',
whereArgs: readingIds,
);
// Kanji queries
final kanjiIds = kanjiElements final kanjiIds = kanjiElements
.map((element) => element['elementId'] as int) .map((element) => element['elementId'] as int)
.toList(); .toList();
late final List<Map<String, Object?>> readingElementInfos;
late final List<Map<String, Object?>> readingElementRestrictions;
late final List<Map<String, Object?>> kanjiElementInfos; late final List<Map<String, Object?>> kanjiElementInfos;
final Future<List<Map<String, Object?>>> kanjielementinfosQuery = connection
.query(
JMdictTableNames.kanjiInfo,
where:
'(elementId) IN (${List.filled(kanjiIds.length, '?').join(',')})',
whereArgs: kanjiIds,
);
// Xref data queries // Xref data queries
await Future.wait([ await Future.wait([
senseantonymsQuery.then((value) => senseAntonyms = value), _senseantonymsQuery(
senseseealsosQuery.then((value) => senseSeeAlsos = value), connection,
senseIds,
).then((value) => senseAntonyms = value),
senseseealsosQuery(
connection,
senseIds,
).then((value) => senseSeeAlsos = value),
]); ]);
late final LinearWordQueryData? senseAntonymData; LinearWordQueryData? senseAntonymData;
final Future<LinearWordQueryData?> senseantonymdataQuery = LinearWordQueryData? senseSeeAlsoData;
fetchXrefData
? fetchLinearWordQueryData(
connection,
senseAntonyms
.map((antonym) => antonym['xrefEntryId'] as int)
.toList(),
fetchXrefData: false,
)
: Future.value(null);
late final LinearWordQueryData? senseSeeAlsoData;
final Future<LinearWordQueryData?> senseseealsodataQuery =
fetchXrefData
? fetchLinearWordQueryData(
connection,
senseSeeAlsos.map((seeAlso) => seeAlso['xrefEntryId'] as int).toList(),
fetchXrefData: false,
)
: Future.value(null);
await Future.wait([ await Future.wait([
sensedialectsQuery.then((value) => senseDialects = value), _sensedialectsQuery(
sensefieldsQuery.then((value) => senseFields = value), connection,
senseglossariesQuery.then((value) => senseGlossaries = value), senseIds,
senseinfosQuery.then((value) => senseInfos = value), ).then((value) => senseDialects = value),
senselanguagesourcesQuery.then((value) => senseLanguageSources = value), _sensefieldsQuery(
sensemiscsQuery.then((value) => senseMiscs = value), connection,
sensepossQuery.then((value) => sensePOSs = value), senseIds,
senserestrictedtokanjisQuery.then( ).then((value) => senseFields = value),
(value) => senseRestrictedToKanjis = value, _senseglossariesQuery(
), connection,
senserestrictedtoreadingsQuery.then( senseIds,
(value) => senseRestrictedToReadings = value, ).then((value) => senseGlossaries = value),
), _senseinfosQuery(connection, senseIds).then((value) => senseInfos = value),
examplesentencesQuery.then((value) => exampleSentences = value), _senselanguagesourcesQuery(
readingelementinfosQuery.then((value) => readingElementInfos = value), connection,
readingelementrestrictionsQuery.then( senseIds,
(value) => readingElementRestrictions = value, ).then((value) => senseLanguageSources = value),
), _sensemiscsQuery(connection, senseIds).then((value) => senseMiscs = value),
kanjielementinfosQuery.then((value) => kanjiElementInfos = value), _sensepossQuery(connection, senseIds).then((value) => sensePOSs = value),
senseantonymdataQuery.then((value) => senseAntonymData = value), _senserestrictedtokanjisQuery(
senseseealsodataQuery.then((value) => senseSeeAlsoData = value), connection,
senseIds,
).then((value) => senseRestrictedToKanjis = value),
_senserestrictedtoreadingsQuery(
connection,
senseIds,
).then((value) => senseRestrictedToReadings = value),
_examplesentencesQuery(
connection,
senseIds,
).then((value) => exampleSentences = value),
_readingelementinfosQuery(
connection,
readingIds,
).then((value) => readingElementInfos = value),
_readingelementrestrictionsQuery(
connection,
readingIds,
).then((value) => readingElementRestrictions = value),
_kanjielementinfosQuery(
connection,
kanjiIds,
).then((value) => kanjiElementInfos = value),
if (fetchXrefData)
_senseantonymdataQuery(
connection,
senseAntonyms.map((antonym) => antonym['xrefEntryId'] as int).toList(),
).then((value) => senseAntonymData = value),
if (fetchXrefData)
_senseseealsodataQuery(
connection,
senseSeeAlsos.map((seeAlso) => seeAlso['xrefEntryId'] as int).toList(),
).then((value) => senseSeeAlsoData = value),
]); ]);
return LinearWordQueryData( return LinearWordQueryData(

View File

@@ -15,15 +15,15 @@ SearchMode _determineSearchMode(String word) {
final bool containsAscii = RegExp(r'[A-Za-z]').hasMatch(word); final bool containsAscii = RegExp(r'[A-Za-z]').hasMatch(word);
if (containsKanji && containsAscii) { if (containsKanji && containsAscii) {
return SearchMode.MixedKanji; return SearchMode.mixedKanji;
} else if (containsKanji) { } else if (containsKanji) {
return SearchMode.Kanji; return SearchMode.kanji;
} else if (containsAscii) { } else if (containsAscii) {
return SearchMode.English; return SearchMode.english;
} else if (word.contains(hiraganaRegex) || word.contains(katakanaRegex)) { } else if (word.contains(hiraganaRegex) || word.contains(katakanaRegex)) {
return SearchMode.Kana; return SearchMode.kana;
} else { } else {
return SearchMode.MixedKana; return SearchMode.mixedKana;
} }
} }
@@ -246,7 +246,7 @@ Future<List<ScoredEntryId>> fetchEntryIds(
int? pageSize, int? pageSize,
int? offset, int? offset,
) async { ) async {
if (searchMode == SearchMode.Auto) { if (searchMode == SearchMode.auto) {
searchMode = _determineSearchMode(word); searchMode = _determineSearchMode(word);
} }
@@ -254,20 +254,20 @@ Future<List<ScoredEntryId>> fetchEntryIds(
late final List<ScoredEntryId> entryIds; late final List<ScoredEntryId> entryIds;
switch (searchMode) { switch (searchMode) {
case SearchMode.Kanji: case SearchMode.kanji:
entryIds = await _queryKanji(connection, word, pageSize, offset); entryIds = await _queryKanji(connection, word, pageSize, offset);
break; break;
case SearchMode.Kana: case SearchMode.kana:
entryIds = await _queryKana(connection, word, pageSize, offset); entryIds = await _queryKana(connection, word, pageSize, offset);
break; break;
case SearchMode.English: case SearchMode.english:
entryIds = await _queryEnglish(connection, word, pageSize, offset); entryIds = await _queryEnglish(connection, word, pageSize, offset);
break; break;
case SearchMode.MixedKana: case SearchMode.mixedKana:
case SearchMode.MixedKanji: case SearchMode.mixedKanji:
default: default:
throw UnimplementedError('Search mode $searchMode is not implemented'); throw UnimplementedError('Search mode $searchMode is not implemented');
} }
@@ -280,7 +280,7 @@ Future<int?> fetchEntryIdCount(
String word, String word,
SearchMode searchMode, SearchMode searchMode,
) async { ) async {
if (searchMode == SearchMode.Auto) { if (searchMode == SearchMode.auto) {
searchMode = _determineSearchMode(word); searchMode = _determineSearchMode(word);
} }
@@ -289,20 +289,20 @@ Future<int?> fetchEntryIdCount(
late final int? entryIdCount; late final int? entryIdCount;
switch (searchMode) { switch (searchMode) {
case SearchMode.Kanji: case SearchMode.kanji:
entryIdCount = await _queryKanjiCount(connection, word); entryIdCount = await _queryKanjiCount(connection, word);
break; break;
case SearchMode.Kana: case SearchMode.kana:
entryIdCount = await _queryKanaCount(connection, word); entryIdCount = await _queryKanaCount(connection, word);
break; break;
case SearchMode.English: case SearchMode.english:
entryIdCount = await _queryEnglishCount(connection, word); entryIdCount = await _queryEnglishCount(connection, word);
break; break;
case SearchMode.MixedKana: case SearchMode.mixedKana:
case SearchMode.MixedKanji: case SearchMode.mixedKanji:
default: default:
throw UnimplementedError('Search mode $searchMode is not implemented'); throw UnimplementedError('Search mode $searchMode is not implemented');
} }

View File

@@ -54,7 +54,7 @@ List<WordSearchResult> regroupWordSearchResults({
.where((element) => element['entryId'] == scoredEntryId.entryId) .where((element) => element['entryId'] == scoredEntryId.entryId)
.toList(); .toList();
final GroupedWordResult entryReadingElementsGrouped = _regroup_words( final GroupedWordResult entryReadingElementsGrouped = _regroupWords(
entryId: scoredEntryId.entryId, entryId: scoredEntryId.entryId,
readingElements: entryReadingElements, readingElements: entryReadingElements,
kanjiElements: entryKanjiElements, kanjiElements: entryKanjiElements,
@@ -64,7 +64,7 @@ List<WordSearchResult> regroupWordSearchResults({
kanjiElementInfos: linearWordQueryData.kanjiElementInfos, kanjiElementInfos: linearWordQueryData.kanjiElementInfos,
); );
final List<WordSearchSense> entrySensesGrouped = _regroup_senses( final List<WordSearchSense> entrySensesGrouped = _regroupSenses(
senses: entrySenses, senses: entrySenses,
senseAntonyms: linearWordQueryData.senseAntonyms, senseAntonyms: linearWordQueryData.senseAntonyms,
senseDialects: linearWordQueryData.senseDialects, senseDialects: linearWordQueryData.senseDialects,
@@ -112,7 +112,7 @@ class GroupedWordResult {
}); });
} }
GroupedWordResult _regroup_words({ GroupedWordResult _regroupWords({
required int entryId, required int entryId,
required List<Map<String, Object?>> kanjiElements, required List<Map<String, Object?>> kanjiElements,
required List<Map<String, Object?>> kanjiElementInfos, required List<Map<String, Object?>> kanjiElementInfos,
@@ -195,7 +195,7 @@ GroupedWordResult _regroup_words({
); );
} }
List<WordSearchSense> _regroup_senses({ List<WordSearchSense> _regroupSenses({
required List<Map<String, Object?>> senses, required List<Map<String, Object?>> senses,
required List<Map<String, Object?>> senseAntonyms, required List<Map<String, Object?>> senseAntonyms,
required List<Map<String, Object?>> senseDialects, required List<Map<String, Object?>> senseDialects,

View File

@@ -13,13 +13,31 @@ import 'package:jadb/search/word_search/regrouping.dart';
import 'package:jadb/table_names/jmdict.dart'; import 'package:jadb/table_names/jmdict.dart';
import 'package:sqflite_common/sqlite_api.dart'; import 'package:sqflite_common/sqlite_api.dart';
enum SearchMode { Auto, English, Kanji, MixedKanji, Kana, MixedKana } enum SearchMode {
/// Try to autodetect what is being searched for
auto,
/// Search for english words
english,
/// Search for the kanji reading of a word
kanji,
/// Search for the kanji reading of a word, mixed in with kana/romaji
mixedKanji,
/// Search for the kana reading of a word
kana,
/// Search for the kana reading of a word, mixed in with romaji
mixedKana,
}
/// Searches for an input string, returning a list of results with their details. Returns null if the input string is empty. /// Searches for an input string, returning a list of results with their details. Returns null if the input string is empty.
Future<List<WordSearchResult>?> searchWordWithDbConnection( Future<List<WordSearchResult>?> searchWordWithDbConnection(
DatabaseExecutor connection, DatabaseExecutor connection,
String word, { String word, {
SearchMode searchMode = SearchMode.Auto, SearchMode searchMode = SearchMode.auto,
int page = 0, int page = 0,
int? pageSize, int? pageSize,
}) async { }) async {
@@ -63,7 +81,7 @@ Future<List<WordSearchResult>?> searchWordWithDbConnection(
Future<int?> searchWordCountWithDbConnection( Future<int?> searchWordCountWithDbConnection(
DatabaseExecutor connection, DatabaseExecutor connection,
String word, { String word, {
SearchMode searchMode = SearchMode.Auto, SearchMode searchMode = SearchMode.auto,
}) async { }) async {
if (word.isEmpty) { if (word.isEmpty) {
return null; return null;

View File

@@ -1,7 +1,7 @@
import 'package:jadb/util/lemmatizer/lemmatizer.dart'; import 'package:jadb/util/lemmatizer/lemmatizer.dart';
import 'package:jadb/util/lemmatizer/rules/godan-verbs.dart'; import 'package:jadb/util/lemmatizer/rules/godan_verbs.dart';
import 'package:jadb/util/lemmatizer/rules/i-adjectives.dart'; import 'package:jadb/util/lemmatizer/rules/i_adjectives.dart';
import 'package:jadb/util/lemmatizer/rules/ichidan-verbs.dart'; import 'package:jadb/util/lemmatizer/rules/ichidan_verbs.dart';
List<LemmatizationRule> lemmatizationRules = [ List<LemmatizationRule> lemmatizationRules = [
...ichidanVerbLemmatizationRules, ...ichidanVerbLemmatizationRules,

View File

@@ -1,9 +1,9 @@
// Source: https://github.com/Kimtaro/ve/blob/master/lib/providers/japanese_transliterators.rb // Source: https://github.com/Kimtaro/ve/blob/master/lib/providers/japanese_transliterators.rb
const hiragana_syllabic_n = ''; const hiraganaSyllabicN = '';
const hiragana_small_tsu = ''; const hiraganaSmallTsu = '';
const Map<String, String> hiragana_to_latin = { const Map<String, String> hiraganaToLatin = {
'': 'a', '': 'a',
'': 'i', '': 'i',
'': 'u', '': 'u',
@@ -209,7 +209,7 @@ const Map<String, String> hiragana_to_latin = {
'': 'yori', '': 'yori',
}; };
const Map<String, String> latin_to_hiragana = { const Map<String, String> latinToHiragana = {
'a': '', 'a': '',
'i': '', 'i': '',
'u': '', 'u': '',
@@ -481,9 +481,9 @@ const Map<String, String> latin_to_hiragana = {
'#~': '', '#~': '',
}; };
bool _smallTsu(String forConversion) => forConversion == hiragana_small_tsu; bool _smallTsu(String forConversion) => forConversion == hiraganaSmallTsu;
bool _nFollowedByYuYeYo(String forConversion, String kana) => bool _nFollowedByYuYeYo(String forConversion, String kana) =>
forConversion == hiragana_syllabic_n && forConversion == hiraganaSyllabicN &&
kana.length > 1 && kana.length > 1 &&
'やゆよ'.contains(kana.substring(1, 2)); 'やゆよ'.contains(kana.substring(1, 2));
@@ -505,7 +505,7 @@ String transliterateHiraganaToLatin(String hiragana) {
} else if (_nFollowedByYuYeYo(forConversion, kana)) { } else if (_nFollowedByYuYeYo(forConversion, kana)) {
mora = "n'"; mora = "n'";
} }
mora ??= hiragana_to_latin[forConversion]; mora ??= hiraganaToLatin[forConversion];
if (mora != null) { if (mora != null) {
if (geminate) { if (geminate) {
@@ -527,7 +527,7 @@ String transliterateHiraganaToLatin(String hiragana) {
bool _doubleNFollowedByAIUEO(String forConversion) => bool _doubleNFollowedByAIUEO(String forConversion) =>
RegExp(r'^nn[aiueo]$').hasMatch(forConversion); RegExp(r'^nn[aiueo]$').hasMatch(forConversion);
bool _hasTableMatch(String forConversion) => bool _hasTableMatch(String forConversion) =>
latin_to_hiragana[forConversion] != null; latinToHiragana[forConversion] != null;
bool _hasDoubleConsonant(String forConversion, int length) => bool _hasDoubleConsonant(String forConversion, int length) =>
forConversion == 'tch' || forConversion == 'tch' ||
(length == 2 && (length == 2 &&
@@ -549,12 +549,12 @@ String transliterateLatinToHiragana(String latin) {
final String forConversion = romaji.substring(0, length); final String forConversion = romaji.substring(0, length);
if (_doubleNFollowedByAIUEO(forConversion)) { if (_doubleNFollowedByAIUEO(forConversion)) {
mora = hiragana_syllabic_n; mora = hiraganaSyllabicN;
forRemoval = 1; forRemoval = 1;
} else if (_hasTableMatch(forConversion)) { } else if (_hasTableMatch(forConversion)) {
mora = latin_to_hiragana[forConversion]; mora = latinToHiragana[forConversion];
} else if (_hasDoubleConsonant(forConversion, length)) { } else if (_hasDoubleConsonant(forConversion, length)) {
mora = hiragana_small_tsu; mora = hiraganaSmallTsu;
forRemoval = 1; forRemoval = 1;
} }

View File

@@ -4,6 +4,6 @@ import 'package:test/test.dart';
void main() { void main() {
test('Assert 2136 kanji in jouyou set', () { test('Assert 2136 kanji in jouyou set', () {
expect(JOUYOU_KANJI_BY_GRADES.values.flattenedToSet.length, 2136); expect(jouyouKanjiByGrades.values.flattenedToSet.length, 2136);
}); });
} }

View File

@@ -1,21 +1,20 @@
import 'dart:ffi';
import 'dart:io';
import 'package:jadb/models/create_empty_db.dart'; import 'package:jadb/models/create_empty_db.dart';
import 'package:jadb/search.dart'; import 'package:jadb/search.dart';
import 'package:sqflite_common_ffi/sqflite_ffi.dart'; import 'package:sqflite_common_ffi/sqflite_ffi.dart';
// import 'package:sqlite3/open.dart'; // import 'package:sqlite3/open.dart';
import 'package:test/test.dart'; import 'package:test/test.dart';
Future<DatabaseExecutor> setup_inmemory_database() async { Future<DatabaseExecutor> setupInMemoryDatabase() async {
final dbConnection = await createDatabaseFactoryFfi().openDatabase(':memory:'); final dbConnection = await createDatabaseFactoryFfi().openDatabase(
':memory:',
);
return dbConnection; return dbConnection;
} }
void main() { void main() {
test('Create empty db', () async { test('Create empty db', () async {
final db = await setup_inmemory_database(); final db = await setupInMemoryDatabase();
await createEmptyDb(db); await createEmptyDb(db);

View File

@@ -5,7 +5,7 @@ import 'setup_database_connection.dart';
void main() { void main() {
test('Filter kanji', () async { test('Filter kanji', () async {
final connection = await setup_database_connection(); final connection = await setupDatabaseConnection();
final result = await connection.filterKanji([ final result = await connection.filterKanji([
'a', 'a',

View File

@@ -6,16 +6,16 @@ import 'setup_database_connection.dart';
void main() { void main() {
test('Search a kanji', () async { test('Search a kanji', () async {
final connection = await setup_database_connection(); final connection = await setupDatabaseConnection();
final result = await connection.jadbSearchKanji(''); final result = await connection.jadbSearchKanji('');
expect(result, isNotNull); expect(result, isNotNull);
}); });
group('Search all jouyou kanji', () { group('Search all jouyou kanji', () {
JOUYOU_KANJI_BY_GRADES.forEach((grade, characters) { jouyouKanjiByGrades.forEach((grade, characters) {
test('Search all kanji in grade $grade', () async { test('Search all kanji in grade $grade', () async {
final connection = await setup_database_connection(); final connection = await setupDatabaseConnection();
for (final character in characters) { for (final character in characters) {
final result = await connection.jadbSearchKanji(character); final result = await connection.jadbSearchKanji(character);

View File

@@ -191,4 +191,67 @@ void main() {
), ),
]); ]);
}); });
test('Infer match with no matches', () {
final wordSearchResult = WordSearchResult(
entryId: 0,
score: 0,
isCommon: false,
jlptLevel: JlptLevel.none,
kanjiInfo: {},
readingInfo: {},
japanese: [WordSearchRuby(base: '仮名', furigana: 'かな')],
senses: [
WordSearchSense(
antonyms: [],
dialects: [],
englishDefinitions: ['kana'],
fields: [],
info: [],
languageSource: [],
misc: [],
partsOfSpeech: [],
restrictedToKanji: [],
restrictedToReading: [],
seeAlso: [],
),
],
sources: WordSearchSources.empty(),
);
wordSearchResult.inferMatchSpans('xyz');
expect(wordSearchResult.matchSpans, isEmpty);
});
test('Infer multiple matches of same substring', () {
final wordSearchResult = WordSearchResult(
entryId: 0,
score: 0,
isCommon: false,
jlptLevel: JlptLevel.none,
kanjiInfo: {},
readingInfo: {},
japanese: [WordSearchRuby(base: 'ああ')],
senses: [],
sources: WordSearchSources.empty(),
);
wordSearchResult.inferMatchSpans('');
expect(wordSearchResult.matchSpans, [
WordSearchMatchSpan(
spanType: WordSearchMatchSpanType.kanji,
start: 0,
end: 1,
index: 0,
),
WordSearchMatchSpan(
spanType: WordSearchMatchSpanType.kanji,
start: 1,
end: 2,
index: 0,
),
]);
});
} }

View File

@@ -3,7 +3,7 @@ import 'dart:io';
import 'package:jadb/_data_ingestion/open_local_db.dart'; import 'package:jadb/_data_ingestion/open_local_db.dart';
import 'package:sqflite_common/sqlite_api.dart'; import 'package:sqflite_common/sqlite_api.dart';
Future<Database> setup_database_connection() async { Future<Database> setupDatabaseConnection() async {
final libSqlitePath = Platform.environment['LIBSQLITE_PATH']; final libSqlitePath = Platform.environment['LIBSQLITE_PATH'];
final jadbPath = Platform.environment['JADB_PATH']; final jadbPath = Platform.environment['JADB_PATH'];

View File

@@ -5,43 +5,43 @@ import 'setup_database_connection.dart';
void main() { void main() {
test('Search a word - english - auto', () async { test('Search a word - english - auto', () async {
final connection = await setup_database_connection(); final connection = await setupDatabaseConnection();
final result = await connection.jadbSearchWord('kana'); final result = await connection.jadbSearchWord('kana');
expect(result, isNotNull); expect(result, isNotNull);
}); });
test('Get word search count - english - auto', () async { test('Get word search count - english - auto', () async {
final connection = await setup_database_connection(); final connection = await setupDatabaseConnection();
final result = await connection.jadbSearchWordCount('kana'); final result = await connection.jadbSearchWordCount('kana');
expect(result, isNotNull); expect(result, isNotNull);
}); });
test('Search a word - japanese kana - auto', () async { test('Search a word - japanese kana - auto', () async {
final connection = await setup_database_connection(); final connection = await setupDatabaseConnection();
final result = await connection.jadbSearchWord('かな'); final result = await connection.jadbSearchWord('かな');
expect(result, isNotNull); expect(result, isNotNull);
}); });
test('Get word search count - japanese kana - auto', () async { test('Get word search count - japanese kana - auto', () async {
final connection = await setup_database_connection(); final connection = await setupDatabaseConnection();
final result = await connection.jadbSearchWordCount('かな'); final result = await connection.jadbSearchWordCount('かな');
expect(result, isNotNull); expect(result, isNotNull);
}); });
test('Search a word - japanese kanji - auto', () async { test('Search a word - japanese kanji - auto', () async {
final connection = await setup_database_connection(); final connection = await setupDatabaseConnection();
final result = await connection.jadbSearchWord('仮名'); final result = await connection.jadbSearchWord('仮名');
expect(result, isNotNull); expect(result, isNotNull);
}); });
test('Get word search count - japanese kanji - auto', () async { test('Get word search count - japanese kanji - auto', () async {
final connection = await setup_database_connection(); final connection = await setupDatabaseConnection();
final result = await connection.jadbSearchWordCount('仮名'); final result = await connection.jadbSearchWordCount('仮名');
expect(result, isNotNull); expect(result, isNotNull);
}); });
test('Get a word by id', () async { test('Get a word by id', () async {
final connection = await setup_database_connection(); final connection = await setupDatabaseConnection();
final result = await connection.jadbGetWordById(1577090); final result = await connection.jadbGetWordById(1577090);
expect(result, isNotNull); expect(result, isNotNull);
}); });
@@ -49,7 +49,7 @@ void main() {
test( test(
'Serialize all words', 'Serialize all words',
() async { () async {
final connection = await setup_database_connection(); final connection = await setupDatabaseConnection();
// Test serializing all words // Test serializing all words
for (final letter in 'aiueoksthnmyrw'.split('')) { for (final letter in 'aiueoksthnmyrw'.split('')) {