Files
jadb/lib/_data_ingestion/seed_database.dart

83 lines
2.8 KiB
Dart

import 'dart:io';
import 'package:jadb/_data_ingestion/jmdict/seed_data.dart';
import 'package:jadb/_data_ingestion/jmdict/xml_parser.dart';
import 'package:jadb/_data_ingestion/kanjidic/seed_data.dart';
import 'package:jadb/_data_ingestion/kanjidic/xml_parser.dart';
import 'package:jadb/_data_ingestion/radkfile/parser.dart';
import 'package:jadb/_data_ingestion/radkfile/seed_data.dart';
import 'package:jadb/_data_ingestion/tanos-jlpt/csv_parser.dart';
import 'package:jadb/_data_ingestion/tanos-jlpt/resolve.dart';
import 'package:jadb/_data_ingestion/tanos-jlpt/seed_data.dart';
import 'package:sqflite_common/sqlite_api.dart';
import 'package:xml/xml.dart';
Future<void> seedData(Database db) async {
await parseAndSeedDataFromJMdict(db);
await parseAndSeedDataFromRADKFILE(db);
await parseAndSeedDataFromKANJIDIC(db);
await parseAndSeedDataFromTanosJLPT(db);
print('Performing VACUUM');
await db.execute('VACUUM');
}
Future<void> parseAndSeedDataFromJMdict(Database db) async {
print('[JMdict] Reading file content...');
final String rawXML = File('data/tmp/JMdict.xml').readAsStringSync();
print('[JMdict] Parsing XML tags...');
final XmlElement root = XmlDocument.parse(rawXML).getElement('JMdict')!;
print('[JMdict] Parsing XML content...');
final entries = parseJMDictData(root);
print('[JMdict] Writing to database...');
await seedJMDictData(entries, db);
}
Future<void> parseAndSeedDataFromKANJIDIC(Database db) async {
print('[KANJIDIC2] Reading file...');
final String rawXML = File('data/tmp/kanjidic2.xml').readAsStringSync();
print('[KANJIDIC2] Parsing XML...');
final XmlElement root = XmlDocument.parse(rawXML).getElement('kanjidic2')!;
print('[KANJIDIC2] Parsing XML content...');
final entries = parseKANJIDICData(root);
print('[KANJIDIC2] Writing to database...');
await seedKANJIDICData(entries, db);
}
Future<void> parseAndSeedDataFromRADKFILE(Database db) async {
print('[RADKFILE] Reading file...');
final File raw = File('data/tmp/RADKFILE');
print('[RADKFILE] Parsing content...');
final blocks = parseRADKFILEBlocks(raw);
print('[RADKFILE] Writing to database...');
await seedRADKFILEData(blocks, db);
}
Future<void> parseAndSeedDataFromTanosJLPT(Database db) async {
print('[TANOS-JLPT] Reading files...');
final Map<String, File> files = {
'N1': File('data/tanos-jlpt/n1.csv'),
'N2': File('data/tanos-jlpt/n2.csv'),
'N3': File('data/tanos-jlpt/n3.csv'),
'N4': File('data/tanos-jlpt/n4.csv'),
'N5': File('data/tanos-jlpt/n5.csv'),
};
print('[TANOS-JLPT] Parsing content...');
final rankedWords = await parseJLPTRankedWords(files);
print('[TANOS-JLPT] Resolving words...');
final resolvedEntries = await resolveAllEntries(rankedWords, db);
print('[TANOS-JLPT] Writing to database...');
await seedTanosJLPTData(resolvedEntries, db);
}