1 Commits

Author SHA1 Message Date
oysteikt d168f07563 WIP
Build and test / build (push) Successful in 17m29s
2026-04-13 21:10:56 +09:00
15 changed files with 71 additions and 272 deletions
+3 -2
View File
@@ -16,14 +16,15 @@ jobs:
uses: https://github.com/cachix/install-nix-action@v31
with:
extra_nix_config: |
experimental-features = nix-command flakes
show-trace = true
max-jobs = auto
trusted-users = root
experimental-features = nix-command flakes
build-users-group =
# - name: Update database inputs
# run: nix flake update datasources
- name: Update database inputs
run: nix flake update datasources
- name: Build database
run: nix build .#database -L
Generated
+7 -7
View File
@@ -7,11 +7,11 @@
]
},
"locked": {
"lastModified": 1780302182,
"narHash": "sha256-IfC+dpdjjlkzrWlm+p851T43GsR04wMAPqGn63jisJ4=",
"lastModified": 1776081209,
"narHash": "sha256-zR1115tcOPnYLk6NznSf7YslyaJLc/MGayEHShitx18=",
"ref": "refs/heads/main",
"rev": "c116674dd1e0b879660e6237e54904aa825d4511",
"revCount": 29,
"rev": "7fe3552bb16e1d315c0b27b243e5eb53cd9e86fc",
"revCount": 13,
"type": "git",
"url": "https://git.pvv.ntnu.no/Mugiten/datasources.git"
},
@@ -22,11 +22,11 @@
},
"nixpkgs": {
"locked": {
"lastModified": 1779560665,
"narHash": "sha256-tpyBcxPpcQb8ukyNF7DoCwfSY3VPsxHoYwj00Cayv5o=",
"lastModified": 1775423009,
"narHash": "sha256-vPKLpjhIVWdDrfiUM8atW6YkIggCEKdSAlJPzzhkQlw=",
"owner": "NixOS",
"repo": "nixpkgs",
"rev": "64c08a7ca051951c8eae34e3e3cb1e202fe36786",
"rev": "68d8aa3d661f0e6bd5862291b5bb263b2a6595c9",
"type": "github"
},
"original": {
+6 -15
View File
@@ -45,7 +45,9 @@ class KanjiElement extends Element {
});
@override
Map<String, Object?> get sqlValue => {...super.sqlValue};
Map<String, Object?> get sqlValue => {
...super.sqlValue,
};
}
class ReadingElement extends Element {
@@ -127,19 +129,6 @@ class XRefParts {
'readingRef': readingRef,
'senseOrderNum': senseOrderNum,
};
@override
bool operator ==(Object other) {
if (identical(this, other)) return true;
return other is XRefParts &&
other.kanjiRef == kanjiRef &&
other.readingRef == readingRef &&
other.senseOrderNum == senseOrderNum;
}
@override
int get hashCode => Object.hash(kanjiRef, readingRef, senseOrderNum);
}
class XRef {
@@ -179,7 +168,9 @@ class Sense extends SQLWritable {
});
@override
Map<String, Object?> get sqlValue => {'senseId': senseId};
Map<String, Object?> get sqlValue => {
'senseId': senseId,
};
bool get isEmpty =>
antonyms.isEmpty &&
+9 -65
View File
@@ -15,12 +15,6 @@ class ResolvedXref {
const ResolvedXref(this.entry, this.ambiguous);
}
// A constant map of xref parts to jmdict id for unresolvable xrefs.
final xrefExceptions = {
// NOTE: see https://www.edrdg.org/jmwsgi/entr.py?svc=jmdict&g=2870981.1~2369718 for details
XRefParts(kanjiRef: 'プレストレスト', readingRef: 'コンクリート'): 2472380,
};
/// Resolves an xref (pair of kanji, optionally reading, and optionally sense number) to an a specific
/// JMdict entry, if possible.
///
@@ -33,65 +27,15 @@ ResolvedXref resolveXref(
SplayTreeMap<String, Set<Entry>> entriesByReading,
XRefParts xref,
) {
late List<Entry> candidateEntries;
if (xrefExceptions.containsKey(xref)) {
final exceptionEntryId = xrefExceptions[xref]!;
// NOTE: this is slow, but we have few exceptions. Let's wait for JMdict XML-NG to be released so we can delete this :)
final exceptionEntry =
entriesByKanji.values
.expand((set) => set)
.firstWhereOrNull((entry) => entry.entryId == exceptionEntryId) ??
entriesByReading.values
.expand((set) => set)
.firstWhereOrNull((entry) => entry.entryId == exceptionEntryId);
if (exceptionEntry != null) {
return ResolvedXref(exceptionEntry, false);
} else {
throw Exception(
'Xref $xref matches an exception entry ID $exceptionEntryId, but that entry was not found among the candidates.',
);
}
}
switch ((xref.kanjiRef, xref.readingRef)) {
case (null, null):
throw Exception('Xref $xref has no kanji or reading reference');
case (final String k, null):
if (!entriesByKanji.containsKey(k)) {
throw Exception(
'Xref $xref has kanji reference "$k" but no entries found with that kanji',
);
}
candidateEntries = entriesByKanji[k]!.toList();
break;
case (null, final String r):
if (!entriesByReading.containsKey(r)) {
throw Exception(
'Xref $xref has reading reference "$r" but no entries found with that reading',
);
}
candidateEntries = entriesByReading[r]!.toList();
break;
case (final String k, final String r):
if (!entriesByKanji.containsKey(k)) {
throw Exception(
'Xref $xref has kanji reference "$k" but no entries found with that kanji',
);
}
if (!entriesByReading.containsKey(r)) {
throw Exception(
'Xref $xref has reading reference "$r" but no entries found with that reading',
);
}
candidateEntries = entriesByKanji[k]!
.intersection(entriesByReading[r]!)
.toList();
}
List<Entry> candidateEntries = switch ((xref.kanjiRef, xref.readingRef)) {
(null, null) => throw Exception(
'Xref $xref has no kanji or reading reference',
),
(final String k, null) => entriesByKanji[k]!.toList(),
(null, final String r) => entriesByReading[r]!.toList(),
(final String k, final String r) =>
entriesByKanji[k]!.intersection(entriesByReading[r]!).toList(),
};
// Filter out entries that don't have the number of senses specified in the xref
if (xref.senseOrderNum != null) {
+3 -9
View File
@@ -4,7 +4,6 @@ import 'package:jadb/models/word_search/word_search_result.dart';
import 'package:jadb/search/filter_kanji.dart';
import 'package:jadb/search/kanji_search.dart';
import 'package:jadb/search/radical_search.dart';
import 'package:jadb/search/versions.dart';
import 'package:jadb/search/word_search/word_search.dart';
import 'package:sqflite_common/sqlite_api.dart';
@@ -19,13 +18,12 @@ extension JaDBConnection on DatabaseExecutor {
searchKanjiWithDbConnection(this, kanji);
/// Search for a kanji in the database.
Future<Map<String, KanjiSearchResult>> jadbGetManyKanji(
Iterable<String> kanji,
) => searchManyKanjiWithDbConnection(this, kanji);
Future<Map<String, KanjiSearchResult>> jadbGetManyKanji(Set<String> kanji) =>
searchManyKanjiWithDbConnection(this, kanji);
/// Filter a list of characters, and return the ones that are listed in the kanji dictionary.
Future<List<String>> filterKanji(
Iterable<String> kanji, {
List<String> kanji, {
bool deduplicate = false,
}) => filterKanjiWithDbConnection(this, kanji, deduplicate);
@@ -70,8 +68,4 @@ extension JaDBConnection on DatabaseExecutor {
/// of the radicals, and return those.
Future<List<String>> jadbSearchKanjiByRadicals(List<String> radicals) =>
searchKanjiByRadicalsWithDbConnection(this, radicals);
/// Retrieve the version information for all datasources in the database.
Future<DatasourceVersions> jadbGetDatasourceVersions() =>
getDatasourceVersions(this);
}
+2 -2
View File
@@ -6,7 +6,7 @@ import 'package:sqflite_common/sqflite.dart';
/// If [deduplicate] is true, the returned list will deduplicate the input kanji list before returning the filtered results.
Future<List<String>> filterKanjiWithDbConnection(
DatabaseExecutor connection,
Iterable<String> kanji,
List<String> kanji,
bool deduplicate,
) async {
final Set<String> filteredKanji = await connection
@@ -14,7 +14,7 @@ Future<List<String>> filterKanjiWithDbConnection(
SELECT "literal"
FROM "${KANJIDICTableNames.character}"
WHERE "literal" IN (${kanji.map((_) => '?').join(',')})
''', kanji.toList())
''', kanji)
.then((value) => value.map((e) => e['literal'] as String).toSet());
if (deduplicate) {
+1 -1
View File
@@ -274,7 +274,7 @@ Future<KanjiSearchResult?> searchKanjiWithDbConnection(
/// Searches for multiple kanji at once, returning a map of kanji to their search results.
Future<Map<String, KanjiSearchResult>> searchManyKanjiWithDbConnection(
DatabaseExecutor connection,
Iterable<String> kanji,
Set<String> kanji,
) async {
if (kanji.isEmpty) {
return {};
-84
View File
@@ -1,84 +0,0 @@
import 'package:jadb/table_names/jmdict.dart';
import 'package:jadb/table_names/kanjidic.dart';
import 'package:jadb/table_names/radkfile.dart';
import 'package:jadb/table_names/tanos_jlpt.dart';
import 'package:sqflite_common/sqlite_api.dart';
class DatasourceVersions {
final String jmdictVersion;
final DateTime jmdictDate;
final String jmdictHash;
final String kanjidic2Version;
final DateTime kanjidic2Date;
final String kanjidic2Hash;
final String radkfileVersion;
final DateTime radkfileDate;
final String radkfileHash;
final String tanosJlptVersion;
final DateTime tanosJlptDate;
final String tanosJlptHash;
const DatasourceVersions({
required this.jmdictVersion,
required this.jmdictDate,
required this.jmdictHash,
required this.kanjidic2Version,
required this.kanjidic2Date,
required this.kanjidic2Hash,
required this.radkfileVersion,
required this.radkfileDate,
required this.radkfileHash,
required this.tanosJlptVersion,
required this.tanosJlptDate,
required this.tanosJlptHash,
});
}
DateTime _parseDateTime(String dateString) {
try {
return DateTime.parse(dateString);
} catch (e) {
if (RegExp(r'^\d{4}-\d{2}$').hasMatch(dateString)) {
return DateTime.parse('$dateString-01');
} else if (RegExp(r'^\d{4}$').hasMatch(dateString)) {
return DateTime.parse('$dateString-01-01');
} else {
throw FormatException('Invalid date format: $dateString');
}
}
}
Future<DatasourceVersions> getDatasourceVersions(
final DatabaseExecutor connection,
) async {
final jmdictVersion = await connection
.query(JMdictTableNames.version)
.then((rows) => rows.first);
final kanjidic2Version = await connection
.query(KANJIDICTableNames.version)
.then((rows) => rows.first);
final radkfileVersion = await connection
.query(RADKFILETableNames.version)
.then((rows) => rows.first);
final tanosJlptVersion = await connection
.query(TanosJLPTTableNames.version)
.then((rows) => rows.first);
return DatasourceVersions(
jmdictVersion: jmdictVersion['version'] as String,
jmdictDate: _parseDateTime(jmdictVersion['date'].toString()),
jmdictHash: jmdictVersion['hash'] as String,
kanjidic2Version: kanjidic2Version['version'] as String,
kanjidic2Date: _parseDateTime(kanjidic2Version['date'].toString()),
kanjidic2Hash: kanjidic2Version['hash'] as String,
radkfileVersion: radkfileVersion['version'] as String,
radkfileDate: _parseDateTime(radkfileVersion['date'].toString()),
radkfileHash: radkfileVersion['hash'] as String,
tanosJlptVersion: tanosJlptVersion['version'] as String,
tanosJlptDate: _parseDateTime(tanosJlptVersion['date'].toString()),
tanosJlptHash: tanosJlptVersion['hash'] as String,
);
}
-1
View File
@@ -1 +0,0 @@
const int jadbSchemaVersion = 1;
+1 -1
View File
@@ -229,7 +229,7 @@ CREATE TABLE "JMdict_SenseGlossary" (
PRIMARY KEY ("senseId", "phrase")
) WITHOUT ROWID;
CREATE INDEX "JMdict_SenseGlossary_byPhrase" ON JMdict_SenseGlossary("phrase");
-- CREATE INDEX "JMdict_SenseGlossary_byPhrase" ON JMdict_SenseGlossary("phrase");
CREATE TABLE "JMdict_SenseGlossaryType" (
"senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("senseId"),
@@ -1,6 +1,6 @@
CREATE TABLE "JMdict_EntryScore" (
"elementId" INTEGER PRIMARY KEY,
"score" INTEGER NOT NULL,
"score" INTEGER NOT NULL DEFAULT 0,
"common" BOOLEAN NOT NULL DEFAULT FALSE,
"entryId" INTEGER NOT NULL GENERATED ALWAYS AS (("elementId" / 100) % 10000000) STORED,
@@ -15,7 +15,8 @@ CREATE TABLE "JMdict_EntryScore" (
CREATE INDEX "JMdict_EntryScore_byElementId_byScore" ON "JMdict_EntryScore"("elementId", "score");
CREATE INDEX "JMdict_EntryScore_byScore" ON "JMdict_EntryScore"("score");
CREATE INDEX "JMdict_EntryScore_byCommon" ON "JMdict_EntryScore"("common") WHERE "common" = TRUE;
CREATE INDEX "JMdict_EntryScore_byElementId_byCommon" ON "JMdict_EntryScore"("elementId", "common");
CREATE INDEX "JMdict_EntryScore_byCommon" ON "JMdict_EntryScore"("common");
-- NOTE: these views are deduplicated in order not to perform an unnecessary
-- UNION on every trigger
+31 -39
View File
@@ -5,18 +5,18 @@ packages:
dependency: transitive
description:
name: _fe_analyzer_shared
sha256: cd6add6f846f35fb79f3c315296703c1a24f3cfd7f4739d91a74961c1c7e9f1b
sha256: "8d718c5c58904f9937290fd5dbf2d6a0e02456867706bfb6cd7b81d394e738d5"
url: "https://pub.dev"
source: hosted
version: "100.0.0"
version: "98.0.0"
analyzer:
dependency: transitive
description:
name: analyzer
sha256: "6ba98576948803398b69e3a444df24eacdbe12ed699c7014e120ea38552debbf"
sha256: "6141ad5d092d1e1d13929c0504658bbeccc1703505830d7c26e859908f5efc88"
url: "https://pub.dev"
source: hosted
version: "13.0.0"
version: "12.0.0"
args:
dependency: "direct main"
description:
@@ -61,10 +61,10 @@ packages:
dependency: transitive
description:
name: code_assets
sha256: bf394f466ba9205f1812a0433b392d6af280f155f56651eda7c18cc32ed493b8
sha256: "83ccdaa064c980b5596c35dd64a8d3ecc68620174ab9b90b6343b753aa721687"
url: "https://pub.dev"
source: hosted
version: "1.2.1"
version: "1.0.0"
collection:
dependency: "direct main"
description:
@@ -149,10 +149,10 @@ packages:
dependency: transitive
description:
name: hooks
sha256: "62ae9bb76d02526c7c2110a19b6e6ad788fe28d35e553e35efb02a41a46ab43a"
sha256: e79ed1e8e1929bc6ecb6ec85f0cb519c887aa5b423705ded0d0f2d9226def388
url: "https://pub.dev"
source: hosted
version: "2.0.1"
version: "1.0.2"
http_multi_server:
dependency: transitive
description:
@@ -197,10 +197,10 @@ packages:
dependency: transitive
description:
name: matcher
sha256: "31bd099b47c10cd1aeb55146a2d46ce0277630ecef3f7dae54ad7873f36696cd"
sha256: dc0b7dc7651697ea4ff3e69ef44b0407ea32c487a39fff6a4004fa585e901861
url: "https://pub.dev"
source: hosted
version: "0.12.20"
version: "0.12.19"
meta:
dependency: transitive
description:
@@ -221,10 +221,10 @@ packages:
dependency: transitive
description:
name: native_toolchain_c
sha256: f59351d28f49520cd3a74eb1f41c5f19ae15e53c65a3231d14af672e46510a96
sha256: "6ba77bb18063eebe9de401f5e6437e95e1438af0a87a3a39084fbd37c90df572"
url: "https://pub.dev"
source: hosted
version: "0.19.1"
version: "0.17.6"
node_preamble:
dependency: transitive
description:
@@ -273,14 +273,6 @@ packages:
url: "https://pub.dev"
source: hosted
version: "2.2.0"
record_use:
dependency: transitive
description:
name: record_use
sha256: "2551bd8eecfe95d14ae75f6021ad0248be5c27f138c2ec12fcb52b500b3ba1ed"
url: "https://pub.dev"
source: hosted
version: "0.6.0"
shelf:
dependency: transitive
description:
@@ -341,26 +333,26 @@ packages:
dependency: "direct main"
description:
name: sqflite_common
sha256: "1581ffbf7a0e333b380d6a30737d78516b826cb35beb7fb0bf8a3ea0c678b465"
sha256: "6ef422a4525ecc601db6c0a2233ff448c731307906e92cabc9ba292afaae16a6"
url: "https://pub.dev"
source: hosted
version: "2.5.8"
version: "2.5.6"
sqflite_common_ffi:
dependency: "direct main"
description:
name: sqflite_common_ffi
sha256: cd0c7f7de39a08f2d54ef144d9058c46eca8461879aaa648025643455c1e5a20
sha256: c59fcdc143839a77581f7a7c4de018e53682408903a0a0800b95ef2dc4033eff
url: "https://pub.dev"
source: hosted
version: "2.4.0+3"
version: "2.4.0+2"
sqlite3:
dependency: "direct main"
description:
name: sqlite3
sha256: "9488c7d2cdb1091c91cacf7e207cff81b28bff8e366f042bad3afe7d34afe189"
sha256: caa693ad15a587a2b4fde093b728131a1827903872171089dedb16f7665d3a91
url: "https://pub.dev"
source: hosted
version: "3.3.2"
version: "3.2.0"
stack_trace:
dependency: transitive
description:
@@ -389,10 +381,10 @@ packages:
dependency: transitive
description:
name: synchronized
sha256: "63896c27e81b28f8cb4e69ead0d3e8f03f1d1e5fc531a3e579cabed6a2c7c9e5"
sha256: c254ade258ec8282947a0acbbc90b9575b4f19673533ee46f2f6e9b3aeefd7c0
url: "https://pub.dev"
source: hosted
version: "3.4.0+1"
version: "3.4.0"
term_glyph:
dependency: transitive
description:
@@ -405,26 +397,26 @@ packages:
dependency: "direct dev"
description:
name: test
sha256: ca578dc12bb8b2f40b67b7d3bd2fac4f31c01a6ff7130a14e2597b919934507f
sha256: "8d9ceddbab833f180fbefed08afa76d7c03513dfdba87ffcec2718b02bbcbf20"
url: "https://pub.dev"
source: hosted
version: "1.31.1"
version: "1.31.0"
test_api:
dependency: transitive
description:
name: test_api
sha256: "2a122cbe059f8b610d3a5415f42e255b6c17b1f21eee1d960f31080237fb4f11"
sha256: "949a932224383300f01be9221c39180316445ecb8e7547f70a41a35bf421fb9e"
url: "https://pub.dev"
source: hosted
version: "0.7.12"
version: "0.7.11"
test_core:
dependency: transitive
description:
name: test_core
sha256: d2e98ec12998368dc59ddd47ab709f2cd55acd6b66dc7db764455a44082f4bc5
sha256: "1991d4cfe85d5043241acac92962c3977c8d2f2add1ee73130c7b286417d1d34"
url: "https://pub.dev"
source: hosted
version: "0.6.18"
version: "0.6.17"
typed_data:
dependency: transitive
description:
@@ -437,10 +429,10 @@ packages:
dependency: transitive
description:
name: vm_service
sha256: "0016aef94fc66495ac78af5859181e3f3bf2026bd8eecc72b9565601e19ab360"
sha256: "45caa6c5917fa127b5dbcfbd1fa60b14e583afdc08bfc96dda38886ca252eb60"
url: "https://pub.dev"
source: hosted
version: "15.2.0"
version: "15.0.2"
watcher:
dependency: transitive
description:
@@ -485,10 +477,10 @@ packages:
dependency: "direct main"
description:
name: xml
sha256: "67f0aff7be013d107995e9b75bf4e7f2c3ef2dfdb2c8e68024bba0a7fd5756a4"
sha256: "971043b3a0d3da28727e40ed3e0b5d18b742fa5a68665cca88e74b7876d5e025"
url: "https://pub.dev"
source: hosted
version: "7.0.1"
version: "6.6.1"
yaml:
dependency: transitive
description:
@@ -498,4 +490,4 @@ packages:
source: hosted
version: "3.1.3"
sdks:
dart: ">=3.11.0 <4.0.0"
dart: ">=3.10.1 <4.0.0"
+1 -1
View File
@@ -15,7 +15,7 @@ dependencies:
sqflite_common: ^2.5.0
sqflite_common_ffi: ^2.3.0
sqlite3: ^3.1.6
xml: '>=6.0.0 < 8.0.0'
xml: ^6.5.0
dev_dependencies:
benchmark_harness: ^2.4.0
-12
View File
@@ -1,12 +0,0 @@
import 'package:jadb/search.dart';
import 'package:test/test.dart';
import 'setup_database_connection.dart';
void main() {
test('Retrieve datasource versions', () async {
final connection = await setupDatabaseConnection();
final result = await connection.jadbGetDatasourceVersions();
expect(result, isNotNull);
});
}
+4 -31
View File
@@ -8,7 +8,7 @@ void main() {
expect(result, 'かたまり');
});
test('Basic test with dakuten', () {
test('Basic test with diacritics', () {
final result = transliterateLatinToHiragana('gadamari');
expect(result, 'がだまり');
});
@@ -54,7 +54,7 @@ void main() {
test('Basic test', expectSpans('katamari', ['', '', '', '']));
test(
'Basic test with dakuten',
'Basic test with diacritics',
expectSpans('gadamari', ['', '', '', '']),
);
test('wi and we', expectSpans('wiwe', ['うぃ', 'うぇ']));
@@ -72,7 +72,7 @@ void main() {
expect(result, 'katamari');
});
test('Basic test with dakuten', () {
test('Basic test with diacritics', () {
final result = transliterateHiraganaToLatin('がだまり');
expect(result, 'gadamari');
});
@@ -91,21 +91,6 @@ void main() {
final result = transliterateHiraganaToLatin('かっぱ');
expect(result, 'kappa');
});
test('Iteration mark', () {
final result = transliterateHiraganaToLatin('さゝき');
expect(result, 'sasaki');
}, skip: 'Not yet implemented');
test('Iteration mark with dakuten', () {
final result = transliterateHiraganaToLatin('あひゞき');
expect(result, 'ahibiki');
}, skip: 'Not yet implemented');
test('Yori', () {
final result = transliterateHiraganaToLatin('');
expect(result, 'yori');
}, skip: 'Not yet implemented');
});
group('Hiragana -> Romaji Spans', () {
@@ -125,7 +110,7 @@ void main() {
test('Basic test', expectSpans('かたまり', ['ka', 'ta', 'ma', 'ri']));
test(
'Basic test with dakuten',
'Basic test with diacritics',
expectSpans('がだまり', ['ga', 'da', 'ma', 'ri']),
);
test('wi and we', expectSpans('うぃうぇ', ['whi', 'whe']));
@@ -133,17 +118,5 @@ void main() {
// TODO: fix the implementation
// test('Double consonant', expectSpans('かっぱ', ['ka', 'ppa']));
test(
'Iteration mark',
expectSpans('さゝき', ['sa', 'sa', 'ki']),
skip: 'Not yet implemented',
);
test(
'Iteration mark with dakuten',
expectSpans('あひゞき', ['a', 'hi', 'bi', 'ki']),
skip: 'Not yet implemented',
);
test('Yori', expectSpans('', ['yori']), skip: 'Not yet implemented');
});
}