From 39f5826eaa782ee2c9e080a37572ef6ccf24ff2b Mon Sep 17 00:00:00 2001 From: h7x4 Date: Wed, 30 Apr 2025 09:57:43 +0200 Subject: [PATCH] lib/_data_ingestion: improve openLocalDB --- lib/_data_ingestion/jmdict/objects.dart | 24 ------- lib/_data_ingestion/jmdict/seed_data.dart | 45 ++++++------- lib/_data_ingestion/jmdict/table_names.dart | 46 ++++++++++++++ lib/_data_ingestion/kanjidic/objects.dart | 19 ------ lib/_data_ingestion/kanjidic/seed_data.dart | 29 ++++----- lib/_data_ingestion/kanjidic/table_names.dart | 35 +++++++++++ lib/_data_ingestion/open_local_db.dart | 63 +++++++++++++++++-- lib/_data_ingestion/radkfile/seed_data.dart | 3 +- lib/_data_ingestion/radkfile/table_names.dart | 7 +++ lib/cli/commands/create_db.dart | 5 -- lib/cli/commands/query_kanji.dart | 5 -- lib/cli/commands/query_word.dart | 5 -- 12 files changed, 186 insertions(+), 100 deletions(-) create mode 100644 lib/_data_ingestion/jmdict/table_names.dart create mode 100644 lib/_data_ingestion/kanjidic/table_names.dart create mode 100644 lib/_data_ingestion/radkfile/table_names.dart diff --git a/lib/_data_ingestion/jmdict/objects.dart b/lib/_data_ingestion/jmdict/objects.dart index c01f08e..1a1d0c3 100644 --- a/lib/_data_ingestion/jmdict/objects.dart +++ b/lib/_data_ingestion/jmdict/objects.dart @@ -1,29 +1,5 @@ import 'package:jadb/_data_ingestion/sql_writable.dart'; -abstract class TableNames { - static const String entry = 'JMdict_Entry'; - static const String entryByKana = 'JMdict_EntryByKana'; - static const String entryByEnglish = 'JMdict_EntryByEnglish'; - static const String kanjiElement = 'JMdict_KanjiElement'; - static const String kanjiInfo = 'JMdict_KanjiElementInfo'; - static const String readingElement = 'JMdict_ReadingElement'; - static const String readingInfo = 'JMdict_ReadingElementInfo'; - static const String readingRestriction = 'JMdict_ReadingElementRestriction'; - static const String sense = 'JMdict_Sense'; - static const String senseAntonyms = 'JMdict_SenseAntonym'; - static const String senseDialect = 'JMdict_SenseDialect'; - static const String senseField = 'JMdict_SenseField'; - static const String senseGlossary = 'JMdict_SenseGlossary'; - static const String senseInfo = 'JMdict_SenseInfo'; - static const String senseMisc = 'JMdict_SenseMisc'; - static const String sensePOS = 'JMdict_SensePOS'; - static const String senseLanguageSource = 'JMdict_SenseLanguageSource'; - static const String senseRestrictedToKanji = 'JMdict_SenseRestrictedToKanji'; - static const String senseRestrictedToReading = - 'JMdict_SenseRestrictedToReading'; - static const String senseSeeAlso = 'JMdict_SenseSeeAlso'; -} - abstract class Element extends SQLWritable { final String reading; final int? news; diff --git a/lib/_data_ingestion/jmdict/seed_data.dart b/lib/_data_ingestion/jmdict/seed_data.dart index 867f6d2..1fb6e60 100644 --- a/lib/_data_ingestion/jmdict/seed_data.dart +++ b/lib/_data_ingestion/jmdict/seed_data.dart @@ -2,6 +2,7 @@ import 'dart:collection'; import 'package:collection/collection.dart'; import 'package:jadb/_data_ingestion/jmdict/objects.dart'; +import 'package:jadb/_data_ingestion/jmdict/table_names.dart'; import 'package:jadb/util/romaji_transliteration.dart'; import 'package:sqflite_common/sqlite_api.dart'; @@ -72,18 +73,18 @@ Future seedJMDictData(List entries, Database db) async { print(' [JMdict] Batch 1 - Kanji and readings'); Batch b = db.batch(); for (final e in entries) { - b.insert(TableNames.entry, e.sqlValue); + b.insert(JMdictTableNames.entry, e.sqlValue); for (final k in e.kanji) { - b.insert(TableNames.kanjiElement, k.sqlValue..addAll({'entryId': e.id})); + b.insert(JMdictTableNames.kanjiElement, k.sqlValue..addAll({'entryId': e.id})); // b.insert( - // TableNames.entryByKana, + // JMdictTableNames.entryByKana, // {'entryId': e.id, 'kana': transliterateKatakanaToHiragana(k.reading)}, // // Some entries have the same reading twice with difference in katakana and hiragana // conflictAlgorithm: ConflictAlgorithm.ignore, // ); for (final i in k.info) { b.insert( - TableNames.kanjiInfo, + JMdictTableNames.kanjiInfo, { 'entryId': e.id, 'reading': k.reading, @@ -94,12 +95,12 @@ Future seedJMDictData(List entries, Database db) async { } for (final r in e.readings) { b.insert( - TableNames.readingElement, + JMdictTableNames.readingElement, r.sqlValue..addAll({'entryId': e.id}), ); b.insert( - TableNames.entryByKana, + JMdictTableNames.entryByKana, { 'entryId': e.id, 'kana': transliterateKanaToLatin(r.reading), @@ -109,7 +110,7 @@ Future seedJMDictData(List entries, Database db) async { ); for (final i in r.info) { b.insert( - TableNames.readingInfo, + JMdictTableNames.readingInfo, { 'entryId': e.id, 'reading': r.reading, @@ -119,7 +120,7 @@ Future seedJMDictData(List entries, Database db) async { } for (final res in r.restrictions) { b.insert( - TableNames.readingRestriction, + JMdictTableNames.readingRestriction, { 'entryId': e.id, 'reading': r.reading, @@ -132,7 +133,7 @@ Future seedJMDictData(List entries, Database db) async { for (final s in e.senses) { for (final g in s.glossary) { b.insert( - TableNames.entryByEnglish, + JMdictTableNames.entryByEnglish, { 'entryId': e.id, 'english': g.phrase, @@ -150,49 +151,49 @@ Future seedJMDictData(List entries, Database db) async { b = db.batch(); for (final e in entries) { for (final s in e.senses) { - b.insert(TableNames.sense, s.sqlValue..addAll({'entryId': e.id})); + b.insert(JMdictTableNames.sense, s.sqlValue..addAll({'entryId': e.id})); for (final d in s.dialects) { - b.insert(TableNames.senseDialect, {'senseId': s.id, 'dialect': d}); + b.insert(JMdictTableNames.senseDialect, {'senseId': s.id, 'dialect': d}); } for (final f in s.fields) { - b.insert(TableNames.senseField, {'senseId': s.id, 'field': f}); + b.insert(JMdictTableNames.senseField, {'senseId': s.id, 'field': f}); } for (final i in s.info) { - b.insert(TableNames.senseInfo, {'senseId': s.id, 'info': i}); + b.insert(JMdictTableNames.senseInfo, {'senseId': s.id, 'info': i}); } for (final m in s.misc) { - b.insert(TableNames.senseMisc, {'senseId': s.id, 'misc': m}); + b.insert(JMdictTableNames.senseMisc, {'senseId': s.id, 'misc': m}); } for (final p in s.pos) { - b.insert(TableNames.sensePOS, {'senseId': s.id, 'pos': p}); + b.insert(JMdictTableNames.sensePOS, {'senseId': s.id, 'pos': p}); } for (final l in s.languageSource) { b.insert( - TableNames.senseLanguageSource, + JMdictTableNames.senseLanguageSource, l.sqlValue..addAll({'senseId': s.id}), ); } for (final rk in s.restrictedToKanji) { b.insert( - TableNames.senseRestrictedToKanji, + JMdictTableNames.senseRestrictedToKanji, {'entryId': e.id, 'senseId': s.id, 'kanji': rk}, ); } for (final rr in s.restrictedToReading) { b.insert( - TableNames.senseRestrictedToReading, + JMdictTableNames.senseRestrictedToReading, {'entryId': e.id, 'senseId': s.id, 'reading': rr}, ); } for (final ls in s.languageSource) { b.insert( - TableNames.senseLanguageSource, + JMdictTableNames.senseLanguageSource, ls.sqlValue..addAll({'senseId': s.id}), ); } for (final g in s.glossary) { b.insert( - TableNames.senseGlossary, + JMdictTableNames.senseGlossary, g.sqlValue..addAll({'senseId': s.id}), ); } @@ -236,7 +237,7 @@ Future seedJMDictData(List entries, Database db) async { ); b.insert( - TableNames.senseSeeAlso, + JMdictTableNames.senseSeeAlso, { 'senseId': s.id, 'xrefEntryId': resolvedEntry.entry.id, @@ -255,7 +256,7 @@ Future seedJMDictData(List entries, Database db) async { ant, ); - b.insert(TableNames.senseAntonyms, { + b.insert(JMdictTableNames.senseAntonyms, { 'senseId': s.id, 'xrefEntryId': resolvedEntry.entry.id, 'antonymKanji': ant.kanjiRef, diff --git a/lib/_data_ingestion/jmdict/table_names.dart b/lib/_data_ingestion/jmdict/table_names.dart new file mode 100644 index 0000000..8f0c61b --- /dev/null +++ b/lib/_data_ingestion/jmdict/table_names.dart @@ -0,0 +1,46 @@ +abstract class JMdictTableNames { + static const String entry = 'JMdict_Entry'; + static const String entryByKana = 'JMdict_EntryByKana'; + static const String entryByEnglish = 'JMdict_EntryByEnglish'; + static const String kanjiElement = 'JMdict_KanjiElement'; + static const String kanjiInfo = 'JMdict_KanjiElementInfo'; + static const String readingElement = 'JMdict_ReadingElement'; + static const String readingInfo = 'JMdict_ReadingElementInfo'; + static const String readingRestriction = 'JMdict_ReadingElementRestriction'; + static const String sense = 'JMdict_Sense'; + static const String senseAntonyms = 'JMdict_SenseAntonym'; + static const String senseDialect = 'JMdict_SenseDialect'; + static const String senseField = 'JMdict_SenseField'; + static const String senseGlossary = 'JMdict_SenseGlossary'; + static const String senseInfo = 'JMdict_SenseInfo'; + static const String senseMisc = 'JMdict_SenseMisc'; + static const String sensePOS = 'JMdict_SensePOS'; + static const String senseLanguageSource = 'JMdict_SenseLanguageSource'; + static const String senseRestrictedToKanji = 'JMdict_SenseRestrictedToKanji'; + static const String senseRestrictedToReading = + 'JMdict_SenseRestrictedToReading'; + static const String senseSeeAlso = 'JMdict_SenseSeeAlso'; + + static Set get allTables => { + entry, + entryByKana, + entryByEnglish, + kanjiElement, + kanjiInfo, + readingElement, + readingInfo, + readingRestriction, + sense, + senseAntonyms, + senseDialect, + senseField, + senseGlossary, + senseInfo, + senseMisc, + sensePOS, + senseLanguageSource, + senseRestrictedToKanji, + senseRestrictedToReading, + senseSeeAlso + }; +} diff --git a/lib/_data_ingestion/kanjidic/objects.dart b/lib/_data_ingestion/kanjidic/objects.dart index e3056b3..c759377 100644 --- a/lib/_data_ingestion/kanjidic/objects.dart +++ b/lib/_data_ingestion/kanjidic/objects.dart @@ -1,24 +1,5 @@ import 'package:jadb/_data_ingestion/sql_writable.dart'; -abstract class TableNames { - static const String character = 'KANJIDIC_Character'; - static const String radicalName = 'KANJIDIC_RadicalName'; - static const String codepoint = 'KANJIDIC_Codepoint'; - static const String radical = 'KANJIDIC_Radical'; - static const String strokeMiscount = 'KANJIDIC_StrokeMiscount'; - static const String variant = 'KANJIDIC_Variant'; - static const String dictionaryReference = - '_KANJIDIC_DictionaryReference_Part1'; - static const String dictionaryReferenceMoro = - '_KANJIDIC_DictionaryReference_Moro'; - static const String queryCode = 'KANJIDIC_QueryCode'; - static const String reading = 'KANJIDIC_Reading'; - static const String kunyomi = 'KANJIDIC_Kunyomi'; - static const String onyomi = 'KANJIDIC_Onyomi'; - static const String meaning = 'KANJIDIC_Meaning'; - static const String nanori = 'KANJIDIC_Nanori'; -} - class CodePoint extends SQLWritable { final String kanji; final String type; diff --git a/lib/_data_ingestion/kanjidic/seed_data.dart b/lib/_data_ingestion/kanjidic/seed_data.dart index 6f5797a..f949f6d 100644 --- a/lib/_data_ingestion/kanjidic/seed_data.dart +++ b/lib/_data_ingestion/kanjidic/seed_data.dart @@ -1,3 +1,4 @@ +import 'package:jadb/_data_ingestion/kanjidic/table_names.dart'; import 'package:sqflite_common/sqlite_api.dart'; import 'objects.dart'; @@ -12,19 +13,19 @@ Future seedKANJIDICData(List characters, Database db) async { // 1)) { // print(c.dictionaryReferences.map((e) => e.sqlValue).toList()); // } - b.insert(TableNames.character, c.sqlValue); + b.insert(KANJIDICTableNames.character, c.sqlValue); for (final n in c.radicalName) { - b.insert(TableNames.radicalName, {'kanji': c.literal, 'name': n}); + b.insert(KANJIDICTableNames.radicalName, {'kanji': c.literal, 'name': n}); } for (final cp in c.codepoints) { - b.insert(TableNames.codepoint, cp.sqlValue); + b.insert(KANJIDICTableNames.codepoint, cp.sqlValue); } for (final r in c.radicals) { - b.insert(TableNames.radical, r.sqlValue); + b.insert(KANJIDICTableNames.radical, r.sqlValue); } for (final sm in c.strokeMiscounts) { b.insert( - TableNames.strokeMiscount, + KANJIDICTableNames.strokeMiscount, { 'kanji': c.literal, 'strokeCount': sm, @@ -32,37 +33,37 @@ Future seedKANJIDICData(List characters, Database db) async { ); } for (final v in c.variants) { - b.insert(TableNames.variant, v.sqlValue); + b.insert(KANJIDICTableNames.variant, v.sqlValue); } for (final dr in c.dictionaryReferences) { // There are duplicate entries here b.insert( - TableNames.dictionaryReference, + KANJIDICTableNames.dictionaryReference, dr.sqlValue, conflictAlgorithm: ConflictAlgorithm.ignore, ); } for (final drm in c.dictionaryReferencesMoro) { - b.insert(TableNames.dictionaryReferenceMoro, drm.sqlValue); + b.insert(KANJIDICTableNames.dictionaryReferenceMoro, drm.sqlValue); } for (final q in c.querycodes) { - b.insert(TableNames.queryCode, q.sqlValue); + b.insert(KANJIDICTableNames.queryCode, q.sqlValue); } for (final r in c.readings) { - b.insert(TableNames.reading, r.sqlValue); + b.insert(KANJIDICTableNames.reading, r.sqlValue); } for (final k in c.kunyomi) { - b.insert(TableNames.kunyomi, k.sqlValue); + b.insert(KANJIDICTableNames.kunyomi, k.sqlValue); } for (final o in c.onyomi) { - b.insert(TableNames.onyomi, o.sqlValue); + b.insert(KANJIDICTableNames.onyomi, o.sqlValue); } for (final m in c.meanings) { - b.insert(TableNames.meaning, m.sqlValue); + b.insert(KANJIDICTableNames.meaning, m.sqlValue); } for (final n in c.nanori) { b.insert( - TableNames.nanori, + KANJIDICTableNames.nanori, { 'kanji': c.literal, 'nanori': n, diff --git a/lib/_data_ingestion/kanjidic/table_names.dart b/lib/_data_ingestion/kanjidic/table_names.dart new file mode 100644 index 0000000..f8b0735 --- /dev/null +++ b/lib/_data_ingestion/kanjidic/table_names.dart @@ -0,0 +1,35 @@ +abstract class KANJIDICTableNames { + static const String character = 'KANJIDIC_Character'; + static const String radicalName = 'KANJIDIC_RadicalName'; + static const String codepoint = 'KANJIDIC_Codepoint'; + static const String radical = 'KANJIDIC_Radical'; + static const String strokeMiscount = 'KANJIDIC_StrokeMiscount'; + static const String variant = 'KANJIDIC_Variant'; + static const String dictionaryReference = + '_KANJIDIC_DictionaryReference_Part1'; + static const String dictionaryReferenceMoro = + '_KANJIDIC_DictionaryReference_Moro'; + static const String queryCode = 'KANJIDIC_QueryCode'; + static const String reading = 'KANJIDIC_Reading'; + static const String kunyomi = 'KANJIDIC_Kunyomi'; + static const String onyomi = 'KANJIDIC_Onyomi'; + static const String meaning = 'KANJIDIC_Meaning'; + static const String nanori = 'KANJIDIC_Nanori'; + + static Set get allTables => { + character, + radicalName, + codepoint, + radical, + strokeMiscount, + variant, + dictionaryReference, + dictionaryReferenceMoro, + queryCode, + reading, + kunyomi, + onyomi, + meaning, + nanori + }; +} diff --git a/lib/_data_ingestion/open_local_db.dart b/lib/_data_ingestion/open_local_db.dart index 03ab19b..e2595e1 100644 --- a/lib/_data_ingestion/open_local_db.dart +++ b/lib/_data_ingestion/open_local_db.dart @@ -1,21 +1,37 @@ import 'dart:ffi'; import 'dart:io'; +import 'package:jadb/_data_ingestion/jmdict/table_names.dart'; +import 'package:jadb/_data_ingestion/kanjidic/table_names.dart'; +import 'package:jadb/_data_ingestion/radkfile/table_names.dart'; import 'package:sqflite_common_ffi/sqflite_ffi.dart'; import 'package:sqlite3/open.dart'; -Future openLocalDb({ +Future openLocalDb({ String? libsqlitePath, String? jadbPath, bool readWrite = false, + bool assertTablesExist = true, }) async { + libsqlitePath ??= Platform.environment['LIBSQLITE_PATH']; + jadbPath ??= Platform.environment['JADB_PATH']; + jadbPath ??= Directory.current.uri.resolve('jadb.sqlite').path; + + libsqlitePath = (libsqlitePath == null) + ? null + : File(libsqlitePath).resolveSymbolicLinksSync(); + jadbPath = File(jadbPath).resolveSymbolicLinksSync(); + if (libsqlitePath == null) { - libsqlitePath = Platform.environment['LIBSQLITE_PATH']; + throw Exception("LIBSQLITE_PATH is not set"); } - if (jadbPath == null) { - jadbPath = Platform.environment['JADB_PATH'] ?? - Directory.current.uri.resolve('jadb.sqlite').path; + if (!File(libsqlitePath).existsSync()) { + throw Exception("LIBSQLITE_PATH does not exist: $libsqlitePath"); + } + + if (!File(jadbPath).existsSync()) { + throw Exception("JADB_PATH does not exist: $jadbPath"); } final db = await createDatabaseFactoryFfi( @@ -31,5 +47,42 @@ Future openLocalDb({ ), ); + if (assertTablesExist) { + await _assertTablesExist(db); + } + return db; } + +Future _assertTablesExist(Database db) async { + final Set tables = await db + .query( + 'sqlite_master', + columns: ['name'], + where: 'type = ?', + whereArgs: ['table'], + ) + .then((result) { + return result.map((row) => row['name'] as String).toSet(); + }); + + final Set expectedTables = { + ...JMdictTableNames.allTables, + ...KANJIDICTableNames.allTables, + ...RADKFILETableNames.allTables, + }; + + final missingTables = expectedTables.difference(tables); + + if (missingTables.isNotEmpty) { + throw Exception([ + 'Missing tables:', + missingTables.map((table) => ' - $table').join('\n'), + '', + 'Found tables:\n', + tables.map((table) => ' - $table').join('\n'), + '', + 'Please ensure the database is correctly set up.', + ].join('\n')); + } +} diff --git a/lib/_data_ingestion/radkfile/seed_data.dart b/lib/_data_ingestion/radkfile/seed_data.dart index 35596d8..88dcc28 100644 --- a/lib/_data_ingestion/radkfile/seed_data.dart +++ b/lib/_data_ingestion/radkfile/seed_data.dart @@ -1,3 +1,4 @@ +import 'package:jadb/_data_ingestion/radkfile/table_names.dart'; import 'package:sqflite_common/sqlite_api.dart'; Future seedRADKFILEData( @@ -15,7 +16,7 @@ Future seedRADKFILEData( for (final kanji in kanjiList.toSet()) { b.insert( - 'RADKFILE', + RADKFILETableNames.radkfile, { 'radical': radical, 'kanji': kanji, diff --git a/lib/_data_ingestion/radkfile/table_names.dart b/lib/_data_ingestion/radkfile/table_names.dart new file mode 100644 index 0000000..925bad5 --- /dev/null +++ b/lib/_data_ingestion/radkfile/table_names.dart @@ -0,0 +1,7 @@ +abstract class RADKFILETableNames { + static const String radkfile = 'RADKFILE'; + + static Set get allTables => { + radkfile, + }; +} diff --git a/lib/cli/commands/create_db.dart b/lib/cli/commands/create_db.dart index e77d987..38fd688 100644 --- a/lib/cli/commands/create_db.dart +++ b/lib/cli/commands/create_db.dart @@ -25,11 +25,6 @@ class CreateDb extends Command { readWrite: true, ); - if (db == null) { - print("Failed to open database"); - exit(1); - } - seedData(db).then((_) { print("Database created successfully"); }).catchError((error) { diff --git a/lib/cli/commands/query_kanji.dart b/lib/cli/commands/query_kanji.dart index a298de5..c5be8bd 100644 --- a/lib/cli/commands/query_kanji.dart +++ b/lib/cli/commands/query_kanji.dart @@ -29,11 +29,6 @@ class QueryKanji extends Command { libsqlitePath: argResults!.option('libsqlite')!, ); - if (db == null) { - print("Failed to open database"); - exit(1); - } - final result = await JaDBConnection(db).searchKanji('漢'); if (result == null) { diff --git a/lib/cli/commands/query_word.dart b/lib/cli/commands/query_word.dart index 28f3f14..40db894 100644 --- a/lib/cli/commands/query_word.dart +++ b/lib/cli/commands/query_word.dart @@ -29,11 +29,6 @@ class QueryWord extends Command { libsqlitePath: argResults!.option('libsqlite')!, ); - if (db == null) { - print("Failed to open database"); - exit(1); - } - final result = await JaDBConnection(db).searchWord('kana'); if (result == null) {