lib/_data_ingestion: improve openLocalDB

This commit is contained in:
2025-04-30 09:57:43 +02:00
parent 1fdcb7a477
commit 39f5826eaa
12 changed files with 186 additions and 100 deletions

View File

@@ -1,29 +1,5 @@
import 'package:jadb/_data_ingestion/sql_writable.dart';
abstract class TableNames {
static const String entry = 'JMdict_Entry';
static const String entryByKana = 'JMdict_EntryByKana';
static const String entryByEnglish = 'JMdict_EntryByEnglish';
static const String kanjiElement = 'JMdict_KanjiElement';
static const String kanjiInfo = 'JMdict_KanjiElementInfo';
static const String readingElement = 'JMdict_ReadingElement';
static const String readingInfo = 'JMdict_ReadingElementInfo';
static const String readingRestriction = 'JMdict_ReadingElementRestriction';
static const String sense = 'JMdict_Sense';
static const String senseAntonyms = 'JMdict_SenseAntonym';
static const String senseDialect = 'JMdict_SenseDialect';
static const String senseField = 'JMdict_SenseField';
static const String senseGlossary = 'JMdict_SenseGlossary';
static const String senseInfo = 'JMdict_SenseInfo';
static const String senseMisc = 'JMdict_SenseMisc';
static const String sensePOS = 'JMdict_SensePOS';
static const String senseLanguageSource = 'JMdict_SenseLanguageSource';
static const String senseRestrictedToKanji = 'JMdict_SenseRestrictedToKanji';
static const String senseRestrictedToReading =
'JMdict_SenseRestrictedToReading';
static const String senseSeeAlso = 'JMdict_SenseSeeAlso';
}
abstract class Element extends SQLWritable {
final String reading;
final int? news;

View File

@@ -2,6 +2,7 @@ import 'dart:collection';
import 'package:collection/collection.dart';
import 'package:jadb/_data_ingestion/jmdict/objects.dart';
import 'package:jadb/_data_ingestion/jmdict/table_names.dart';
import 'package:jadb/util/romaji_transliteration.dart';
import 'package:sqflite_common/sqlite_api.dart';
@@ -72,18 +73,18 @@ Future<void> seedJMDictData(List<Entry> entries, Database db) async {
print(' [JMdict] Batch 1 - Kanji and readings');
Batch b = db.batch();
for (final e in entries) {
b.insert(TableNames.entry, e.sqlValue);
b.insert(JMdictTableNames.entry, e.sqlValue);
for (final k in e.kanji) {
b.insert(TableNames.kanjiElement, k.sqlValue..addAll({'entryId': e.id}));
b.insert(JMdictTableNames.kanjiElement, k.sqlValue..addAll({'entryId': e.id}));
// b.insert(
// TableNames.entryByKana,
// JMdictTableNames.entryByKana,
// {'entryId': e.id, 'kana': transliterateKatakanaToHiragana(k.reading)},
// // Some entries have the same reading twice with difference in katakana and hiragana
// conflictAlgorithm: ConflictAlgorithm.ignore,
// );
for (final i in k.info) {
b.insert(
TableNames.kanjiInfo,
JMdictTableNames.kanjiInfo,
{
'entryId': e.id,
'reading': k.reading,
@@ -94,12 +95,12 @@ Future<void> seedJMDictData(List<Entry> entries, Database db) async {
}
for (final r in e.readings) {
b.insert(
TableNames.readingElement,
JMdictTableNames.readingElement,
r.sqlValue..addAll({'entryId': e.id}),
);
b.insert(
TableNames.entryByKana,
JMdictTableNames.entryByKana,
{
'entryId': e.id,
'kana': transliterateKanaToLatin(r.reading),
@@ -109,7 +110,7 @@ Future<void> seedJMDictData(List<Entry> entries, Database db) async {
);
for (final i in r.info) {
b.insert(
TableNames.readingInfo,
JMdictTableNames.readingInfo,
{
'entryId': e.id,
'reading': r.reading,
@@ -119,7 +120,7 @@ Future<void> seedJMDictData(List<Entry> entries, Database db) async {
}
for (final res in r.restrictions) {
b.insert(
TableNames.readingRestriction,
JMdictTableNames.readingRestriction,
{
'entryId': e.id,
'reading': r.reading,
@@ -132,7 +133,7 @@ Future<void> seedJMDictData(List<Entry> entries, Database db) async {
for (final s in e.senses) {
for (final g in s.glossary) {
b.insert(
TableNames.entryByEnglish,
JMdictTableNames.entryByEnglish,
{
'entryId': e.id,
'english': g.phrase,
@@ -150,49 +151,49 @@ Future<void> seedJMDictData(List<Entry> entries, Database db) async {
b = db.batch();
for (final e in entries) {
for (final s in e.senses) {
b.insert(TableNames.sense, s.sqlValue..addAll({'entryId': e.id}));
b.insert(JMdictTableNames.sense, s.sqlValue..addAll({'entryId': e.id}));
for (final d in s.dialects) {
b.insert(TableNames.senseDialect, {'senseId': s.id, 'dialect': d});
b.insert(JMdictTableNames.senseDialect, {'senseId': s.id, 'dialect': d});
}
for (final f in s.fields) {
b.insert(TableNames.senseField, {'senseId': s.id, 'field': f});
b.insert(JMdictTableNames.senseField, {'senseId': s.id, 'field': f});
}
for (final i in s.info) {
b.insert(TableNames.senseInfo, {'senseId': s.id, 'info': i});
b.insert(JMdictTableNames.senseInfo, {'senseId': s.id, 'info': i});
}
for (final m in s.misc) {
b.insert(TableNames.senseMisc, {'senseId': s.id, 'misc': m});
b.insert(JMdictTableNames.senseMisc, {'senseId': s.id, 'misc': m});
}
for (final p in s.pos) {
b.insert(TableNames.sensePOS, {'senseId': s.id, 'pos': p});
b.insert(JMdictTableNames.sensePOS, {'senseId': s.id, 'pos': p});
}
for (final l in s.languageSource) {
b.insert(
TableNames.senseLanguageSource,
JMdictTableNames.senseLanguageSource,
l.sqlValue..addAll({'senseId': s.id}),
);
}
for (final rk in s.restrictedToKanji) {
b.insert(
TableNames.senseRestrictedToKanji,
JMdictTableNames.senseRestrictedToKanji,
{'entryId': e.id, 'senseId': s.id, 'kanji': rk},
);
}
for (final rr in s.restrictedToReading) {
b.insert(
TableNames.senseRestrictedToReading,
JMdictTableNames.senseRestrictedToReading,
{'entryId': e.id, 'senseId': s.id, 'reading': rr},
);
}
for (final ls in s.languageSource) {
b.insert(
TableNames.senseLanguageSource,
JMdictTableNames.senseLanguageSource,
ls.sqlValue..addAll({'senseId': s.id}),
);
}
for (final g in s.glossary) {
b.insert(
TableNames.senseGlossary,
JMdictTableNames.senseGlossary,
g.sqlValue..addAll({'senseId': s.id}),
);
}
@@ -236,7 +237,7 @@ Future<void> seedJMDictData(List<Entry> entries, Database db) async {
);
b.insert(
TableNames.senseSeeAlso,
JMdictTableNames.senseSeeAlso,
{
'senseId': s.id,
'xrefEntryId': resolvedEntry.entry.id,
@@ -255,7 +256,7 @@ Future<void> seedJMDictData(List<Entry> entries, Database db) async {
ant,
);
b.insert(TableNames.senseAntonyms, {
b.insert(JMdictTableNames.senseAntonyms, {
'senseId': s.id,
'xrefEntryId': resolvedEntry.entry.id,
'antonymKanji': ant.kanjiRef,

View File

@@ -0,0 +1,46 @@
abstract class JMdictTableNames {
static const String entry = 'JMdict_Entry';
static const String entryByKana = 'JMdict_EntryByKana';
static const String entryByEnglish = 'JMdict_EntryByEnglish';
static const String kanjiElement = 'JMdict_KanjiElement';
static const String kanjiInfo = 'JMdict_KanjiElementInfo';
static const String readingElement = 'JMdict_ReadingElement';
static const String readingInfo = 'JMdict_ReadingElementInfo';
static const String readingRestriction = 'JMdict_ReadingElementRestriction';
static const String sense = 'JMdict_Sense';
static const String senseAntonyms = 'JMdict_SenseAntonym';
static const String senseDialect = 'JMdict_SenseDialect';
static const String senseField = 'JMdict_SenseField';
static const String senseGlossary = 'JMdict_SenseGlossary';
static const String senseInfo = 'JMdict_SenseInfo';
static const String senseMisc = 'JMdict_SenseMisc';
static const String sensePOS = 'JMdict_SensePOS';
static const String senseLanguageSource = 'JMdict_SenseLanguageSource';
static const String senseRestrictedToKanji = 'JMdict_SenseRestrictedToKanji';
static const String senseRestrictedToReading =
'JMdict_SenseRestrictedToReading';
static const String senseSeeAlso = 'JMdict_SenseSeeAlso';
static Set<String> get allTables => {
entry,
entryByKana,
entryByEnglish,
kanjiElement,
kanjiInfo,
readingElement,
readingInfo,
readingRestriction,
sense,
senseAntonyms,
senseDialect,
senseField,
senseGlossary,
senseInfo,
senseMisc,
sensePOS,
senseLanguageSource,
senseRestrictedToKanji,
senseRestrictedToReading,
senseSeeAlso
};
}

View File

@@ -1,24 +1,5 @@
import 'package:jadb/_data_ingestion/sql_writable.dart';
abstract class TableNames {
static const String character = 'KANJIDIC_Character';
static const String radicalName = 'KANJIDIC_RadicalName';
static const String codepoint = 'KANJIDIC_Codepoint';
static const String radical = 'KANJIDIC_Radical';
static const String strokeMiscount = 'KANJIDIC_StrokeMiscount';
static const String variant = 'KANJIDIC_Variant';
static const String dictionaryReference =
'_KANJIDIC_DictionaryReference_Part1';
static const String dictionaryReferenceMoro =
'_KANJIDIC_DictionaryReference_Moro';
static const String queryCode = 'KANJIDIC_QueryCode';
static const String reading = 'KANJIDIC_Reading';
static const String kunyomi = 'KANJIDIC_Kunyomi';
static const String onyomi = 'KANJIDIC_Onyomi';
static const String meaning = 'KANJIDIC_Meaning';
static const String nanori = 'KANJIDIC_Nanori';
}
class CodePoint extends SQLWritable {
final String kanji;
final String type;

View File

@@ -1,3 +1,4 @@
import 'package:jadb/_data_ingestion/kanjidic/table_names.dart';
import 'package:sqflite_common/sqlite_api.dart';
import 'objects.dart';
@@ -12,19 +13,19 @@ Future<void> seedKANJIDICData(List<Character> characters, Database db) async {
// 1)) {
// print(c.dictionaryReferences.map((e) => e.sqlValue).toList());
// }
b.insert(TableNames.character, c.sqlValue);
b.insert(KANJIDICTableNames.character, c.sqlValue);
for (final n in c.radicalName) {
b.insert(TableNames.radicalName, {'kanji': c.literal, 'name': n});
b.insert(KANJIDICTableNames.radicalName, {'kanji': c.literal, 'name': n});
}
for (final cp in c.codepoints) {
b.insert(TableNames.codepoint, cp.sqlValue);
b.insert(KANJIDICTableNames.codepoint, cp.sqlValue);
}
for (final r in c.radicals) {
b.insert(TableNames.radical, r.sqlValue);
b.insert(KANJIDICTableNames.radical, r.sqlValue);
}
for (final sm in c.strokeMiscounts) {
b.insert(
TableNames.strokeMiscount,
KANJIDICTableNames.strokeMiscount,
{
'kanji': c.literal,
'strokeCount': sm,
@@ -32,37 +33,37 @@ Future<void> seedKANJIDICData(List<Character> characters, Database db) async {
);
}
for (final v in c.variants) {
b.insert(TableNames.variant, v.sqlValue);
b.insert(KANJIDICTableNames.variant, v.sqlValue);
}
for (final dr in c.dictionaryReferences) {
// There are duplicate entries here
b.insert(
TableNames.dictionaryReference,
KANJIDICTableNames.dictionaryReference,
dr.sqlValue,
conflictAlgorithm: ConflictAlgorithm.ignore,
);
}
for (final drm in c.dictionaryReferencesMoro) {
b.insert(TableNames.dictionaryReferenceMoro, drm.sqlValue);
b.insert(KANJIDICTableNames.dictionaryReferenceMoro, drm.sqlValue);
}
for (final q in c.querycodes) {
b.insert(TableNames.queryCode, q.sqlValue);
b.insert(KANJIDICTableNames.queryCode, q.sqlValue);
}
for (final r in c.readings) {
b.insert(TableNames.reading, r.sqlValue);
b.insert(KANJIDICTableNames.reading, r.sqlValue);
}
for (final k in c.kunyomi) {
b.insert(TableNames.kunyomi, k.sqlValue);
b.insert(KANJIDICTableNames.kunyomi, k.sqlValue);
}
for (final o in c.onyomi) {
b.insert(TableNames.onyomi, o.sqlValue);
b.insert(KANJIDICTableNames.onyomi, o.sqlValue);
}
for (final m in c.meanings) {
b.insert(TableNames.meaning, m.sqlValue);
b.insert(KANJIDICTableNames.meaning, m.sqlValue);
}
for (final n in c.nanori) {
b.insert(
TableNames.nanori,
KANJIDICTableNames.nanori,
{
'kanji': c.literal,
'nanori': n,

View File

@@ -0,0 +1,35 @@
abstract class KANJIDICTableNames {
static const String character = 'KANJIDIC_Character';
static const String radicalName = 'KANJIDIC_RadicalName';
static const String codepoint = 'KANJIDIC_Codepoint';
static const String radical = 'KANJIDIC_Radical';
static const String strokeMiscount = 'KANJIDIC_StrokeMiscount';
static const String variant = 'KANJIDIC_Variant';
static const String dictionaryReference =
'_KANJIDIC_DictionaryReference_Part1';
static const String dictionaryReferenceMoro =
'_KANJIDIC_DictionaryReference_Moro';
static const String queryCode = 'KANJIDIC_QueryCode';
static const String reading = 'KANJIDIC_Reading';
static const String kunyomi = 'KANJIDIC_Kunyomi';
static const String onyomi = 'KANJIDIC_Onyomi';
static const String meaning = 'KANJIDIC_Meaning';
static const String nanori = 'KANJIDIC_Nanori';
static Set<String> get allTables => {
character,
radicalName,
codepoint,
radical,
strokeMiscount,
variant,
dictionaryReference,
dictionaryReferenceMoro,
queryCode,
reading,
kunyomi,
onyomi,
meaning,
nanori
};
}

View File

@@ -1,21 +1,37 @@
import 'dart:ffi';
import 'dart:io';
import 'package:jadb/_data_ingestion/jmdict/table_names.dart';
import 'package:jadb/_data_ingestion/kanjidic/table_names.dart';
import 'package:jadb/_data_ingestion/radkfile/table_names.dart';
import 'package:sqflite_common_ffi/sqflite_ffi.dart';
import 'package:sqlite3/open.dart';
Future<Database?> openLocalDb({
Future<Database> openLocalDb({
String? libsqlitePath,
String? jadbPath,
bool readWrite = false,
bool assertTablesExist = true,
}) async {
libsqlitePath ??= Platform.environment['LIBSQLITE_PATH'];
jadbPath ??= Platform.environment['JADB_PATH'];
jadbPath ??= Directory.current.uri.resolve('jadb.sqlite').path;
libsqlitePath = (libsqlitePath == null)
? null
: File(libsqlitePath).resolveSymbolicLinksSync();
jadbPath = File(jadbPath).resolveSymbolicLinksSync();
if (libsqlitePath == null) {
libsqlitePath = Platform.environment['LIBSQLITE_PATH'];
throw Exception("LIBSQLITE_PATH is not set");
}
if (jadbPath == null) {
jadbPath = Platform.environment['JADB_PATH'] ??
Directory.current.uri.resolve('jadb.sqlite').path;
if (!File(libsqlitePath).existsSync()) {
throw Exception("LIBSQLITE_PATH does not exist: $libsqlitePath");
}
if (!File(jadbPath).existsSync()) {
throw Exception("JADB_PATH does not exist: $jadbPath");
}
final db = await createDatabaseFactoryFfi(
@@ -31,5 +47,42 @@ Future<Database?> openLocalDb({
),
);
if (assertTablesExist) {
await _assertTablesExist(db);
}
return db;
}
Future<void> _assertTablesExist(Database db) async {
final Set<String> tables = await db
.query(
'sqlite_master',
columns: ['name'],
where: 'type = ?',
whereArgs: ['table'],
)
.then((result) {
return result.map((row) => row['name'] as String).toSet();
});
final Set<String> expectedTables = {
...JMdictTableNames.allTables,
...KANJIDICTableNames.allTables,
...RADKFILETableNames.allTables,
};
final missingTables = expectedTables.difference(tables);
if (missingTables.isNotEmpty) {
throw Exception([
'Missing tables:',
missingTables.map((table) => ' - $table').join('\n'),
'',
'Found tables:\n',
tables.map((table) => ' - $table').join('\n'),
'',
'Please ensure the database is correctly set up.',
].join('\n'));
}
}

View File

@@ -1,3 +1,4 @@
import 'package:jadb/_data_ingestion/radkfile/table_names.dart';
import 'package:sqflite_common/sqlite_api.dart';
Future<void> seedRADKFILEData(
@@ -15,7 +16,7 @@ Future<void> seedRADKFILEData(
for (final kanji in kanjiList.toSet()) {
b.insert(
'RADKFILE',
RADKFILETableNames.radkfile,
{
'radical': radical,
'kanji': kanji,

View File

@@ -0,0 +1,7 @@
abstract class RADKFILETableNames {
static const String radkfile = 'RADKFILE';
static Set<String> get allTables => {
radkfile,
};
}

View File

@@ -25,11 +25,6 @@ class CreateDb extends Command {
readWrite: true,
);
if (db == null) {
print("Failed to open database");
exit(1);
}
seedData(db).then((_) {
print("Database created successfully");
}).catchError((error) {

View File

@@ -29,11 +29,6 @@ class QueryKanji extends Command {
libsqlitePath: argResults!.option('libsqlite')!,
);
if (db == null) {
print("Failed to open database");
exit(1);
}
final result = await JaDBConnection(db).searchKanji('');
if (result == null) {

View File

@@ -29,11 +29,6 @@ class QueryWord extends Command {
libsqlitePath: argResults!.option('libsqlite')!,
);
if (db == null) {
print("Failed to open database");
exit(1);
}
final result = await JaDBConnection(db).searchWord('kana');
if (result == null) {