105 lines
3.6 KiB
Dart
105 lines
3.6 KiB
Dart
import 'dart:io';
|
|
|
|
import 'package:jadb/_data_ingestion/jmdict/seed_data.dart';
|
|
import 'package:jadb/_data_ingestion/jmdict/xml_parser.dart';
|
|
import 'package:jadb/_data_ingestion/kanjidic/seed_data.dart';
|
|
import 'package:jadb/_data_ingestion/kanjidic/xml_parser.dart';
|
|
import 'package:jadb/_data_ingestion/radkfile/parser.dart';
|
|
import 'package:jadb/_data_ingestion/radkfile/seed_data.dart';
|
|
import 'package:jadb/_data_ingestion/tanos-jlpt/csv_parser.dart';
|
|
import 'package:jadb/_data_ingestion/tanos-jlpt/resolve.dart';
|
|
import 'package:jadb/_data_ingestion/tanos-jlpt/seed_data.dart';
|
|
import 'package:sqflite_common/sqlite_api.dart';
|
|
import 'package:xml/xml.dart';
|
|
|
|
Future<void> seedData(Database db) async {
|
|
await parseAndSeedDataFromJMdict(db);
|
|
await parseAndSeedDataFromRADKFILE(db);
|
|
await parseAndSeedDataFromKANJIDIC(db);
|
|
await parseAndSeedDataFromTanosJLPT(db);
|
|
|
|
print('Performing VACUUM');
|
|
await db.execute('VACUUM');
|
|
}
|
|
|
|
Future<void> parseAndSeedDataFromJMdict(Database db) async {
|
|
final jmdictPath = Platform.environment['JMDICT_PATH'] ?? 'data/JMdict.xml';
|
|
if (!File(jmdictPath).existsSync()) {
|
|
throw Exception('JMdict file not found at $jmdictPath');
|
|
}
|
|
|
|
print('[JMdict] Reading file content...');
|
|
final String rawXML = File(jmdictPath).readAsStringSync();
|
|
|
|
print('[JMdict] Parsing XML tags...');
|
|
final XmlElement root = XmlDocument.parse(rawXML).getElement('JMdict')!;
|
|
|
|
print('[JMdict] Parsing XML content...');
|
|
final entries = parseJMDictData(root);
|
|
|
|
print('[JMdict] Writing to database...');
|
|
await seedJMDictData(entries, db);
|
|
}
|
|
|
|
Future<void> parseAndSeedDataFromKANJIDIC(Database db) async {
|
|
final kanjidicPath =
|
|
Platform.environment['KANJIDIC_PATH'] ?? 'data/kanjidic2.xml';
|
|
if (!File(kanjidicPath).existsSync()) {
|
|
throw Exception('KANJIDIC file not found at $kanjidicPath');
|
|
}
|
|
|
|
print('[KANJIDIC2] Reading file...');
|
|
final String rawXML = File(kanjidicPath).readAsStringSync();
|
|
|
|
print('[KANJIDIC2] Parsing XML...');
|
|
final XmlElement root = XmlDocument.parse(rawXML).getElement('kanjidic2')!;
|
|
|
|
print('[KANJIDIC2] Parsing XML content...');
|
|
final entries = parseKANJIDICData(root);
|
|
|
|
print('[KANJIDIC2] Writing to database...');
|
|
await seedKANJIDICData(entries, db);
|
|
}
|
|
|
|
Future<void> parseAndSeedDataFromRADKFILE(Database db) async {
|
|
final radkfilePath = Platform.environment['RADKFILE_PATH'] ?? 'data/RADKFILE';
|
|
if (!File(radkfilePath).existsSync()) {
|
|
throw Exception('RADKFILE not found at $radkfilePath');
|
|
}
|
|
|
|
print('[RADKFILE] Reading file...');
|
|
final File raw = File(radkfilePath);
|
|
|
|
print('[RADKFILE] Parsing content...');
|
|
final blocks = parseRADKFILEBlocks(raw);
|
|
|
|
print('[RADKFILE] Writing to database...');
|
|
await seedRADKFILEData(blocks, db);
|
|
}
|
|
|
|
Future<void> parseAndSeedDataFromTanosJLPT(Database db) async {
|
|
final tanosJlptPath =
|
|
Platform.environment['TANOS_JLPT_PATH'] ?? 'data/tanos-jlpt';
|
|
if (!Directory(tanosJlptPath).existsSync()) {
|
|
throw Exception('TANOS-JLPT directory not found at $tanosJlptPath');
|
|
}
|
|
|
|
print('[TANOS-JLPT] Reading files...');
|
|
final Map<String, File> files = {
|
|
'N1': File('$tanosJlptPath/n1.csv'),
|
|
'N2': File('$tanosJlptPath/n2.csv'),
|
|
'N3': File('$tanosJlptPath/n3.csv'),
|
|
'N4': File('$tanosJlptPath/n4.csv'),
|
|
'N5': File('$tanosJlptPath/n5.csv'),
|
|
};
|
|
|
|
print('[TANOS-JLPT] Parsing content...');
|
|
final rankedWords = await parseJLPTRankedWords(files);
|
|
|
|
print('[TANOS-JLPT] Resolving words...');
|
|
final resolvedEntries = await resolveAllEntries(rankedWords, db);
|
|
|
|
print('[TANOS-JLPT] Writing to database...');
|
|
await seedTanosJLPTData(resolvedEntries, db);
|
|
}
|