12 Commits

Author SHA1 Message Date
d168f07563 WIP
All checks were successful
Build and test / build (push) Successful in 17m29s
2026-04-13 21:10:56 +09:00
d13138f8a5 Add datasource versions to database
All checks were successful
Build and test / build (push) Successful in 7m56s
2026-04-13 21:00:39 +09:00
cbaa9ec6b3 benchmark: create separate benchmarks for jp and en search
All checks were successful
Build and test / build (push) Successful in 6m53s
2026-04-13 20:24:26 +09:00
d1e2fa3748 test/search/radical_search: skip failing tests for now 2026-04-13 20:24:25 +09:00
3f4fdf470d jmdict: store glossary type in different table 2026-04-13 20:24:25 +09:00
556d07913d jmdict: don't store glossary language 2026-04-13 19:42:11 +09:00
6165045ea7 migrations: simplify JMdict_CombinedEntryScore 2026-04-13 19:29:04 +09:00
316dff3b46 migrations: comment out unused jmdict <-> kanjidic xref table 2026-04-13 19:27:16 +09:00
747e680a02 migrations: remove some excessive indices 2026-04-13 19:27:16 +09:00
4f73e07056 test/search/radical_search: init 2026-04-13 19:12:15 +09:00
15540514f6 jmdict: don't store kanji + reading for xrefs
All checks were successful
Build and test / build (push) Successful in 7m0s
2026-04-13 18:33:06 +09:00
4faf543d6e jmdict: don't store empty entry scores
All checks were successful
Build and test / build (push) Successful in 8m17s
2026-04-13 18:18:48 +09:00
20 changed files with 301 additions and 72 deletions

View File

@@ -1,5 +1,7 @@
import './search/word_search.dart';
import './search/english_word_search.dart';
import './search/japanese_word_search.dart';
Future<void> main() async {
await WordSearchBenchmark.main();
await EnglishWordSearchBenchmark.main();
await JapaneseWordSearchBenchmark.main();
}

View File

@@ -4,23 +4,23 @@ import 'package:sqflite_common/sqlite_api.dart';
import '../../test/search/setup_database_connection.dart';
class WordSearchBenchmark extends AsyncBenchmarkBase {
class EnglishWordSearchBenchmark extends AsyncBenchmarkBase {
Database? connection;
static final List<String> searchTerms = [
'kana',
'kanji',
'kawaii',
'cute',
'sushi',
'ramen',
];
WordSearchBenchmark() : super('WordSearchBenchmark');
EnglishWordSearchBenchmark() : super('EnglishWordSearchBenchmark');
static Future<void> main() async {
print('Running WordSearchBenchmark...');
await WordSearchBenchmark().report();
print('Finished WordSearchBenchmark');
print('Running EnglishWordSearchBenchmark...');
await EnglishWordSearchBenchmark().report();
print('Finished EnglishWordSearchBenchmark');
}
@override
@@ -31,7 +31,11 @@ class WordSearchBenchmark extends AsyncBenchmarkBase {
@override
Future<void> run() async {
for (final term in searchTerms) {
await connection!.jadbSearchWord(term);
final result = await connection!.jadbSearchWord(term);
assert(
result?.isNotEmpty ?? false,
'Expected search results for term "$term"',
);
}
}

View File

@@ -0,0 +1,49 @@
import 'package:benchmark_harness/benchmark_harness.dart';
import 'package:jadb/search.dart';
import 'package:sqflite_common/sqlite_api.dart';
import '../../test/search/setup_database_connection.dart';
class JapaneseWordSearchBenchmark extends AsyncBenchmarkBase {
Database? connection;
static final List<String> searchTerms = [
'仮名',
'漢字',
'かわいい',
'すし',
'ラメン',
];
JapaneseWordSearchBenchmark() : super('JapaneseWordSearchBenchmark');
static Future<void> main() async {
print('Running JapaneseWordSearchBenchmark...');
await JapaneseWordSearchBenchmark().report();
print('Finished JapaneseWordSearchBenchmark');
}
@override
Future<void> setup() async {
connection = await setupDatabaseConnection();
}
@override
Future<void> run() async {
for (final term in searchTerms) {
final result = await connection!.jadbSearchWord(term);
assert(
result?.isNotEmpty ?? false,
'Expected search results for term "$term"',
);
}
}
@override
Future<void> teardown() async {
await connection?.close();
}
// @override
// Future<void> exercise() => run();
}

View File

@@ -25,4 +25,5 @@ The `JMdict_EntryScore` table is used to store the score of each entry, which is
The table is automatically generated from other tables via triggers, and should be considered as a materialized view.
There is a score row for every single entry in both `JMdict_KanjiElement` and `JMdict_ReadingElement`, split by the `type` field.
<s>There is a score row for every single entry in both `JMdict_KanjiElement` and `JMdict_ReadingElement`, split by the `type` field.</s>
This is no longer true, we now only store the rows for which the score is not `0`. The `type` field is now also virtual, since the `elementId` fields for both kanji and readings are unique to each other.

8
flake.lock generated
View File

@@ -7,11 +7,11 @@
]
},
"locked": {
"lastModified": 1775550160,
"narHash": "sha256-bgvKrMGUPaDY4EZv+82z1ccYoxwaergdVw/3PZhc2Fc=",
"lastModified": 1776081209,
"narHash": "sha256-zR1115tcOPnYLk6NznSf7YslyaJLc/MGayEHShitx18=",
"ref": "refs/heads/main",
"rev": "f46229af3678124c5ea7c8dff3292747d0274f69",
"revCount": 8,
"rev": "7fe3552bb16e1d315c0b27b243e5eb53cd9e86fc",
"revCount": 13,
"type": "git",
"url": "https://git.pvv.ntnu.no/Mugiten/datasources.git"
},

View File

@@ -106,9 +106,8 @@ class Glossary extends SQLWritable {
@override
Map<String, Object?> get sqlValue => {
'language': language,
// 'language': language,
'phrase': phrase,
'type': type,
};
}

View File

@@ -1,4 +1,5 @@
import 'dart:collection';
import 'dart:io';
import 'package:collection/collection.dart';
import 'package:jadb/_data_ingestion/jmdict/objects.dart';
@@ -84,6 +85,21 @@ Future<void> seedJMDictData(List<Entry> entries, Database db) async {
print(' [JMdict] Batch 1 - Kanji and readings');
Batch b = db.batch();
if (Platform.environment['JMDICT_VERSION'] != null &&
Platform.environment['JMDICT_DATE'] != null &&
Platform.environment['JMDICT_HASH'] != null) {
b.insert(JMdictTableNames.version, {
'version': Platform.environment['JMDICT_VERSION']!,
'date': Platform.environment['JMDICT_DATE']!,
'hash': Platform.environment['JMDICT_HASH']!,
});
} else {
print(
'WARNING: JMDICT version information not found in environment variables. '
'This may cause issues with future updates.',
);
}
for (final e in entries) {
b.insert(JMdictTableNames.entry, e.sqlValue);
@@ -172,6 +188,14 @@ Future<void> seedJMDictData(List<Entry> entries, Database db) async {
JMdictTableNames.senseGlossary,
g.sqlValue..addAll({'senseId': s.senseId}),
);
if (g.type != null) {
b.insert(JMdictTableNames.senseGlossaryType, {
'senseId': s.senseId,
'phrase': g.phrase,
'type': g.type!,
});
}
}
}
}
@@ -219,8 +243,6 @@ Future<void> seedJMDictData(List<Entry> entries, Database db) async {
b.insert(JMdictTableNames.senseSeeAlso, {
'senseId': s.senseId,
'xrefEntryId': resolvedEntry.entry.entryId,
'seeAlsoKanji': xref.kanjiRef,
'seeAlsoReading': xref.readingRef,
'seeAlsoSense': xref.senseOrderNum != null
? xref.senseOrderNum! - 1
: null,
@@ -248,8 +270,6 @@ Future<void> seedJMDictData(List<Entry> entries, Database db) async {
b.insert(JMdictTableNames.senseAntonyms, {
'senseId': s.senseId,
'xrefEntryId': resolvedEntry.entry.entryId,
'antonymKanji': ant.kanjiRef,
'antonymReading': ant.readingRef,
'antonymSense': ant.senseOrderNum != null
? ant.senseOrderNum! - 1
: null,

View File

@@ -1,3 +1,5 @@
import 'dart:io';
import 'package:jadb/table_names/kanjidic.dart';
import 'package:sqflite_common/sqlite_api.dart';
@@ -5,6 +7,22 @@ import 'objects.dart';
Future<void> seedKANJIDICData(List<Character> characters, Database db) async {
final b = db.batch();
if (Platform.environment['KANJIDIC_VERSION'] != null &&
Platform.environment['KANJIDIC_DATE'] != null &&
Platform.environment['KANJIDIC_HASH'] != null) {
b.insert(KANJIDICTableNames.version, {
'version': Platform.environment['KANJIDIC_VERSION']!,
'date': Platform.environment['KANJIDIC_DATE']!,
'hash': Platform.environment['KANJIDIC_HASH']!,
});
} else {
print(
'WARNING: KANJIDIC version information not found in environment variables. '
'This may cause issues with future updates.',
);
}
for (final c in characters) {
// if (c.dictionaryReferences.any((e) =>
// c.dictionaryReferences
@@ -30,10 +48,7 @@ Future<void> seedKANJIDICData(List<Character> characters, Database db) async {
}
if (c.jlpt != null) {
b.insert(KANJIDICTableNames.jlpt, {
'kanji': c.literal,
'jlpt': c.jlpt!,
});
b.insert(KANJIDICTableNames.jlpt, {'kanji': c.literal, 'jlpt': c.jlpt!});
}
for (final n in c.radicalName) {

View File

@@ -1,9 +1,26 @@
import 'dart:io';
import 'package:jadb/table_names/radkfile.dart';
import 'package:sqflite_common/sqlite_api.dart';
Future<void> seedRADKFILEData(Iterable<String> blocks, Database db) async {
final b = db.batch();
if (Platform.environment['RADKFILE_VERSION'] != null &&
Platform.environment['RADKFILE_DATE'] != null &&
Platform.environment['RADKFILE_HASH'] != null) {
b.insert(RADKFILETableNames.version, {
'version': Platform.environment['RADKFILE_VERSION']!,
'date': Platform.environment['RADKFILE_DATE']!,
'hash': Platform.environment['RADKFILE_HASH']!,
});
} else {
print(
'WARNING: RADKFILE version information not found in environment variables. '
'This may cause issues with future updates.',
);
}
for (final block in blocks) {
final String radical = block[1];
final List<String> kanjiList =

View File

@@ -1,3 +1,5 @@
import 'dart:io';
import 'package:jadb/table_names/tanos_jlpt.dart';
import 'package:sqflite_common/sqlite_api.dart';
@@ -7,6 +9,21 @@ Future<void> seedTanosJLPTData(
) async {
final Batch b = db.batch();
if (Platform.environment['TANOS_JLPT_VERSION'] != null &&
Platform.environment['TANOS_JLPT_DATE'] != null &&
Platform.environment['TANOS_JLPT_HASH'] != null) {
b.insert(TanosJLPTTableNames.version, {
'version': Platform.environment['TANOS_JLPT_VERSION']!,
'date': Platform.environment['TANOS_JLPT_DATE']!,
'hash': Platform.environment['TANOS_JLPT_HASH']!,
});
} else {
print(
'WARNING: Tanos JLPT version information not found in environment variables. '
'This may cause issues with future updates.',
);
}
for (final jlptLevel in resolvedEntries.entries) {
final level = jlptLevel.key;
final entryIds = jlptLevel.value;

View File

@@ -68,25 +68,25 @@ String _filterFTSSensitiveCharacters(String word) {
"$tableName"."entryId",
100
+ (("${tableName}FTS"."reading" = ?) * 10000)
+ "JMdict_EntryScore"."score"
+ (("$tableName"."orderNum" = 0) * 20)
+ COALESCE("JMdict_EntryScore"."score", 0)
AS "score"
FROM "${tableName}FTS"
JOIN "$tableName" USING ("elementId")
JOIN "JMdict_EntryScore" USING ("elementId")
LEFT JOIN "JMdict_EntryScore" USING ("elementId")
WHERE "${tableName}FTS"."reading" MATCH ? || '*'
AND "JMdict_EntryScore"."elementId" ${tableName == JMdictTableNames.kanjiElement ? '<' : '>='} 1000000000
),
non_fts_results AS (
SELECT DISTINCT
"$tableName"."entryId",
50
+ "JMdict_EntryScore"."score"
+ (("$tableName"."orderNum" = 0) * 20)
+ COALESCE("JMdict_EntryScore"."score", 0)
AS "score"
FROM "$tableName"
JOIN "JMdict_EntryScore" USING ("elementId")
LEFT JOIN "JMdict_EntryScore" USING ("elementId")
WHERE "reading" LIKE '%' || ? || '%'
AND "$tableName"."entryId" NOT IN (SELECT "entryId" FROM "fts_results")
AND "JMdict_EntryScore"."elementId" ${tableName == JMdictTableNames.kanjiElement ? '<' : '>='} 1000000000
)
SELECT ${countOnly ? 'COUNT(DISTINCT "entryId") AS count' : '"entryId", MAX("score") AS "score"'}
@@ -198,16 +198,16 @@ Future<List<ScoredEntryId>> _queryEnglish(
'''
SELECT
"${JMdictTableNames.sense}"."entryId",
MAX("JMdict_EntryScore"."score")
COALESCE(MAX("JMdict_EntryScore"."score"), 0)
+ (("${JMdictTableNames.senseGlossary}"."phrase" = ?1 AND "${JMdictTableNames.sense}"."orderNum" = 0) * 50)
+ (("${JMdictTableNames.senseGlossary}"."phrase" = ?1 AND "${JMdictTableNames.sense}"."orderNum" = 1) * 30)
+ (("${JMdictTableNames.senseGlossary}"."phrase" = ?1) * 20)
+ (("${JMdictTableNames.senseGlossary}"."phrase" = ?1 AND "${JMdictTableNames.sense}"."orderNum" > 1) * 20)
as "score"
FROM "${JMdictTableNames.senseGlossary}"
JOIN "${JMdictTableNames.sense}" USING ("senseId")
JOIN "JMdict_EntryScore" USING ("entryId")
LEFT JOIN "JMdict_EntryScore" USING ("entryId")
WHERE "${JMdictTableNames.senseGlossary}"."phrase" LIKE ?2
GROUP BY "JMdict_EntryScore"."entryId"
GROUP BY "${JMdictTableNames.sense}"."entryId"
ORDER BY
"score" DESC,
"${JMdictTableNames.sense}"."entryId" ASC
@@ -215,7 +215,7 @@ Future<List<ScoredEntryId>> _queryEnglish(
${offset != null ? 'OFFSET ?4' : ''}
'''
.trim(),
[word, '%${word.replaceAll('%', '')}%', if (pageSize != null) pageSize, if (offset != null) offset],
[word, '%${word.replaceAll('%', '')}%', ?pageSize, ?offset],
);
return result

View File

@@ -11,6 +11,7 @@ abstract class JMdictTableNames {
static const String senseDialect = 'JMdict_SenseDialect';
static const String senseField = 'JMdict_SenseField';
static const String senseGlossary = 'JMdict_SenseGlossary';
static const String senseGlossaryType = 'JMdict_SenseGlossaryType';
static const String senseInfo = 'JMdict_SenseInfo';
static const String senseMisc = 'JMdict_SenseMisc';
static const String sensePOS = 'JMdict_SensePOS';
@@ -33,6 +34,7 @@ abstract class JMdictTableNames {
senseDialect,
senseField,
senseGlossary,
senseGlossaryType,
senseInfo,
senseMisc,
sensePOS,

View File

@@ -139,8 +139,6 @@ CREATE TABLE "JMdict_SenseRestrictedToReading" (
CREATE TABLE "JMdict_SenseSeeAlso" (
"senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("senseId"),
"xrefEntryId" INTEGER NOT NULL,
"seeAlsoReading" TEXT,
"seeAlsoKanji" TEXT,
"seeAlsoSense" INTEGER,
-- For some entries, the cross reference is ambiguous. This means that while the ingestion
-- has determined some xrefEntryId, it is not guaranteed to be the correct one.
@@ -153,18 +151,14 @@ CREATE TABLE "JMdict_SenseSeeAlso" (
END
) VIRTUAL,
FOREIGN KEY ("xrefEntryId", "seeAlsoKanji") REFERENCES "JMdict_KanjiElement"("entryId", "reading"),
FOREIGN KEY ("xrefEntryId", "seeAlsoReading") REFERENCES "JMdict_ReadingElement"("entryId", "reading"),
FOREIGN KEY ("seeAlsoSenseKey") REFERENCES "JMdict_Sense"("senseId"),
UNIQUE("senseId", "xrefEntryId", "seeAlsoReading", "seeAlsoKanji", "seeAlsoSense")
PRIMARY KEY ("senseId", "xrefEntryId", "seeAlsoSense")
);
CREATE TABLE "JMdict_SenseAntonym" (
"senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("senseId"),
"xrefEntryId" INTEGER NOT NULL,
"antonymReading" TEXT,
"antonymKanji" TEXT,
"antonymSense" INTEGER,
-- For some entries, the cross reference is ambiguous. This means that while the ingestion
-- has determined some xrefEntryId, it is not guaranteed to be the correct one.
@@ -177,11 +171,9 @@ CREATE TABLE "JMdict_SenseAntonym" (
END
) VIRTUAL,
FOREIGN KEY ("xrefEntryId", "antonymKanji") REFERENCES "JMdict_KanjiElement"("entryId", "reading"),
FOREIGN KEY ("xrefEntryId", "antonymReading") REFERENCES "JMdict_ReadingElement"("entryId", "reading"),
FOREIGN KEY ("antonymSenseKey") REFERENCES "JMdict_Sense"("senseId"),
UNIQUE("senseId", "xrefEntryId", "antonymReading", "antonymKanji", "antonymSense")
PRIMARY KEY ("senseId", "xrefEntryId", "antonymSense")
);
-- These cross references are going to be mostly accessed from a sense
@@ -232,12 +224,20 @@ CREATE TABLE "JMdict_SenseDialect" (
CREATE TABLE "JMdict_SenseGlossary" (
"senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("senseId"),
"phrase" TEXT NOT NULL,
"language" CHAR(3) NOT NULL DEFAULT "eng",
"type" TEXT,
PRIMARY KEY ("senseId", "language", "phrase")
-- "language" CHAR(3) NOT NULL DEFAULT "eng",
-- PRIMARY KEY ("senseId", "language", "phrase")
PRIMARY KEY ("senseId", "phrase")
) WITHOUT ROWID;
CREATE INDEX "JMdict_SenseGlossary_byPhrase" ON JMdict_SenseGlossary("phrase");
-- CREATE INDEX "JMdict_SenseGlossary_byPhrase" ON JMdict_SenseGlossary("phrase");
CREATE TABLE "JMdict_SenseGlossaryType" (
"senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("senseId"),
"phrase" TEXT NOT NULL,
"type" TEXT NOT NULL,
PRIMARY KEY ("senseId", "phrase", "type"),
FOREIGN KEY ("senseId", "phrase") REFERENCES "JMdict_SenseGlossary"("senseId", "phrase")
) WITHOUT ROWID;
CREATE TABLE "JMdict_SenseInfo" (
"senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("senseId"),

View File

@@ -46,7 +46,7 @@ SELECT
+ (("spec" IS 2) * 5)
+ (("gai" IS 1) * 10)
+ (("gai" IS 2) * 5)
+ (("orderNum" IS 0) * 20)
-- + (("orderNum" IS 0) * 20)
- (substr(COALESCE("JMdict_JLPTTag"."jlptLevel", 'N0'), 2) * -5)
AS "score"
FROM "JMdict_ReadingElement"
@@ -77,7 +77,7 @@ SELECT
+ (("spec" IS 2) * 5)
+ (("gai" IS 1) * 10)
+ (("gai" IS 2) * 5)
+ (("orderNum" IS 0) * 20)
-- + (("orderNum" IS 0) * 20)
- (substr(COALESCE("JMdict_JLPTTag"."jlptLevel", 'N0'), 2) * -5)
AS "score"
FROM "JMdict_KanjiElement"
@@ -103,7 +103,8 @@ BEGIN
)
SELECT "elementId", "score", "common"
FROM "JMdict_EntryScoreView_Reading"
WHERE "elementId" = NEW."elementId";
WHERE "elementId" = NEW."elementId"
AND "score" > 0;
END;
CREATE TRIGGER "JMdict_EntryScore_Update_JMdict_ReadingElement"
@@ -116,6 +117,10 @@ BEGIN
"common" = "JMdict_EntryScoreView_Reading"."common"
FROM "JMdict_EntryScoreView_Reading"
WHERE "elementId" = NEW."elementId";
DELETE FROM "JMdict_EntryScore"
WHERE "elementId" = NEW."elementId"
AND "score" <= 0;
END;
CREATE TRIGGER "JMdict_EntryScore_Delete_JMdict_ReadingElement"
@@ -137,7 +142,8 @@ BEGIN
)
SELECT "elementId", "score", "common"
FROM "JMdict_EntryScoreView_Kanji"
WHERE "elementId" = NEW."elementId";
WHERE "elementId" = NEW."elementId"
AND "score" > 0;
END;
CREATE TRIGGER "JMdict_EntryScore_Update_JMdict_KanjiElement"
@@ -150,6 +156,10 @@ BEGIN
"common" = "JMdict_EntryScoreView_Kanji"."common"
FROM "JMdict_EntryScoreView_Kanji"
WHERE "elementId" = NEW."elementId";
DELETE FROM "JMdict_EntryScore"
WHERE "elementId" = NEW."elementId"
AND "score" <= 0;
END;
CREATE TRIGGER "JMdict_EntryScore_Delete_JMdict_KanjiElement"
@@ -199,4 +209,8 @@ BEGIN
WHERE "JMdict_EntryScoreView"."entryId" = OLD."entryId"
AND "JMdict_EntryScore"."entryId" = OLD."entryId"
AND "JMdict_EntryScoreView"."elementId" = "JMdict_EntryScore"."elementId";
DELETE FROM "JMdict_EntryScore"
WHERE "elementId" = OLD."elementId"
AND "score" <= 0;
END;

View File

@@ -18,4 +18,3 @@ CREATE TABLE "RADKFILE" (
) WITHOUT ROWID;
CREATE INDEX "RADK" ON "RADKFILE"("radical");
CREATE INDEX "KRAD" ON "RADKFILE"("kanji");

View File

@@ -4,7 +4,6 @@ CREATE TABLE "XREF__KANJIDIC_Radical__RADKFILE"(
PRIMARY KEY ("radicalId", "radicalSymbol")
) WITHOUT ROWID;
CREATE INDEX "XREF__KANJIDIC_Radical__RADKFILE__byRadicalId" ON "XREF__KANJIDIC_Radical__RADKFILE"("radicalId");
CREATE INDEX "XREF__KANJIDIC_Radical__RADKFILE__byRadicalSymbol" ON "XREF__KANJIDIC_Radical__RADKFILE"("radicalSymbol");
/* Source: https://ctext.org/kangxi-zidian */

View File

@@ -1,10 +1,10 @@
CREATE TABLE "XREF__JMdict_KanjiElement__KANJIDIC_Character"(
"entryId" INTEGER NOT NULL,
"reading" TEXT NOT NULL,
"kanji" CHAR(1) NOT NULL REFERENCES "KANJIDIC_Character"("literal"),
PRIMARY KEY ("entryId", "reading", "kanji"),
FOREIGN KEY ("entryId", "reading") REFERENCES "JMdict_KanjiElement"("entryId", "reading")
) WITHOUT ROWID;
-- CREATE TABLE "XREF__JMdict_KanjiElement__KANJIDIC_Character"(
-- "entryId" INTEGER NOT NULL,
-- "reading" TEXT NOT NULL,
-- "kanji" CHAR(1) NOT NULL REFERENCES "KANJIDIC_Character"("literal"),
-- PRIMARY KEY ("entryId", "reading", "kanji"),
-- FOREIGN KEY ("entryId", "reading") REFERENCES "JMdict_KanjiElement"("entryId", "reading")
-- ) WITHOUT ROWID;
CREATE INDEX "XREF__JMdict_KanjiElement__KANJIDIC_Character__byEntryId_byReading" ON "XREF__JMdict_KanjiElement__KANJIDIC_Character"("entryId", "reading");
CREATE INDEX "XREF__JMdict_KanjiElement__KANJIDIC_Character__byKanji" ON "XREF__JMdict_KanjiElement__KANJIDIC_Character"("kanji");
-- CREATE INDEX "XREF__JMdict_KanjiElement__KANJIDIC_Character__byEntryId_byReading" ON "XREF__JMdict_KanjiElement__KANJIDIC_Character"("entryId", "reading");
-- CREATE INDEX "XREF__JMdict_KanjiElement__KANJIDIC_Character__byKanji" ON "XREF__JMdict_KanjiElement__KANJIDIC_Character"("kanji");

View File

@@ -75,13 +75,7 @@ SELECT DISTINCT "radical" FROM "RADKFILE";
CREATE VIEW "JMdict_CombinedEntryScore"
AS
SELECT
CASE
WHEN "JMdict_EntryScore"."type" = 'k'
THEN (SELECT entryId FROM "JMdict_KanjiElement" WHERE "elementId" = "JMdict_EntryScore"."elementId")
WHEN "JMdict_EntryScore"."type" = 'r'
THEN (SELECT entryId FROM "JMdict_ReadingElement" WHERE "elementId" = "JMdict_EntryScore"."elementId")
END AS "entryId",
MAX("JMdict_EntryScore"."score") AS "score",
MAX("JMdict_EntryScore"."common") AS "common"
FROM "JMdict_EntryScore"
GROUP BY "entryId";
GROUP BY "JMdict_EntryScore"."entryId";

View File

@@ -19,13 +19,31 @@ stdenvNoCC.mkDerivation {
sqlite
];
env = {
JMDICT_VERSION = jmdict.version;
JMDICT_DATE = jmdict.date;
JMDICT_HASH = jmdict.hash;
KANJIDIC_VERSION = kanjidic2.version;
KANJIDIC_DATE = kanjidic2.date;
KANJIDIC_HASH = kanjidic2.hash;
RADKFILE_VERSION = radkfile.version;
RADKFILE_DATE = radkfile.date;
RADKFILE_HASH = radkfile.hash;
TANOS_JLPT_VERSION = tanos-jlpt.version;
TANOS_JLPT_DATE = tanos-jlpt.date;
TANOS_JLPT_HASH = tanos-jlpt.hash;
};
buildPhase = ''
runHook preBuild
mkdir -p data
ln -s '${jmdict}'/* data/
ln -s '${radkfile}'/* data/
ln -s '${kanjidic2}'/* data/
ln -s '${radkfile}'/* data/
ln -s '${tanos-jlpt}' data/tanos-jlpt
for migration in migrations/*.sql; do

View File

@@ -0,0 +1,79 @@
import 'package:collection/collection.dart';
import 'package:jadb/const_data/radicals.dart';
import 'package:jadb/search.dart';
import 'package:jadb/table_names/radkfile.dart';
import 'package:test/test.dart';
import 'setup_database_connection.dart';
void main() {
test(
'All constant radicals should exist in the database',
() async {
final connection = await setupDatabaseConnection();
final allRadicalsInDb = await connection.query(
RADKFILETableNames.radkfile,
columns: ['radical'],
distinct: true,
);
final radicalsInDb = allRadicalsInDb
.map((e) => e['radical'] as String)
.toSet();
final missingRadicals = radicals.values.flattenedToSet.difference(
radicalsInDb,
);
expect(
missingRadicals,
isEmpty,
reason: 'Missing radicals in database: $missingRadicals',
);
},
skip: 'Test is valid, code is broken, fix me',
);
test(
'All radicals in database should be in the constant radical list',
() async {
final connection = await setupDatabaseConnection();
final allRadicalsInDb = await connection.query(
RADKFILETableNames.radkfile,
columns: ['radical'],
distinct: true,
);
final radicalsInDb = allRadicalsInDb
.map((e) => e['radical'] as String)
.toSet();
final extraRadicals = radicalsInDb.difference(
radicals.values.flattenedToSet,
);
expect(
extraRadicals,
isEmpty,
reason:
'Extra radicals in database missing in the constant list: $extraRadicals',
);
},
skip: 'Test is valid, code is broken, fix me',
);
group(
'All radicals should return results',
() {
for (final radical in radicals.values.flattened) {
test(' - $radical', () async {
final connection = await setupDatabaseConnection();
final result = await connection.jadbSearchKanjiByRadicals([radical]);
expect(result, isNotEmpty);
});
}
},
skip:
'These will be automatically fixed once the other radical tests are passing',
);
}