_data_ingestion: retrieve input paths from env
Build and test / build (push) Successful in 5m44s

This commit is contained in:
2026-06-08 14:07:39 +09:00
parent ab878b3469
commit f0e919c397
3 changed files with 45 additions and 19 deletions
+30 -8
View File
@@ -23,8 +23,13 @@ Future<void> seedData(Database db) async {
}
Future<void> parseAndSeedDataFromJMdict(Database db) async {
final jmdictPath = Platform.environment['JMDICT_PATH'] ?? 'data/JMdict.xml';
if (!File(jmdictPath).existsSync()) {
throw Exception('JMdict file not found at $jmdictPath');
}
print('[JMdict] Reading file content...');
final String rawXML = File('data/JMdict.xml').readAsStringSync();
final String rawXML = File(jmdictPath).readAsStringSync();
print('[JMdict] Parsing XML tags...');
final XmlElement root = XmlDocument.parse(rawXML).getElement('JMdict')!;
@@ -37,8 +42,14 @@ Future<void> parseAndSeedDataFromJMdict(Database db) async {
}
Future<void> parseAndSeedDataFromKANJIDIC(Database db) async {
final kanjidicPath =
Platform.environment['KANJIDIC_PATH'] ?? 'data/kanjidic2.xml';
if (!File(kanjidicPath).existsSync()) {
throw Exception('KANJIDIC file not found at $kanjidicPath');
}
print('[KANJIDIC2] Reading file...');
final String rawXML = File('data/kanjidic2.xml').readAsStringSync();
final String rawXML = File(kanjidicPath).readAsStringSync();
print('[KANJIDIC2] Parsing XML...');
final XmlElement root = XmlDocument.parse(rawXML).getElement('kanjidic2')!;
@@ -51,8 +62,13 @@ Future<void> parseAndSeedDataFromKANJIDIC(Database db) async {
}
Future<void> parseAndSeedDataFromRADKFILE(Database db) async {
final radkfilePath = Platform.environment['RADKFILE_PATH'] ?? 'data/RADKFILE';
if (!File(radkfilePath).existsSync()) {
throw Exception('RADKFILE not found at $radkfilePath');
}
print('[RADKFILE] Reading file...');
final File raw = File('data/RADKFILE');
final File raw = File(radkfilePath);
print('[RADKFILE] Parsing content...');
final blocks = parseRADKFILEBlocks(raw);
@@ -62,13 +78,19 @@ Future<void> parseAndSeedDataFromRADKFILE(Database db) async {
}
Future<void> parseAndSeedDataFromTanosJLPT(Database db) async {
final tanosJlptPath =
Platform.environment['TANOS_JLPT_PATH'] ?? 'data/tanos-jlpt';
if (!Directory(tanosJlptPath).existsSync()) {
throw Exception('TANOS-JLPT directory not found at $tanosJlptPath');
}
print('[TANOS-JLPT] Reading files...');
final Map<String, File> files = {
'N1': File('data/tanos-jlpt/n1.csv'),
'N2': File('data/tanos-jlpt/n2.csv'),
'N3': File('data/tanos-jlpt/n3.csv'),
'N4': File('data/tanos-jlpt/n4.csv'),
'N5': File('data/tanos-jlpt/n5.csv'),
'N1': File('$tanosJlptPath/n1.csv'),
'N2': File('$tanosJlptPath/n2.csv'),
'N3': File('$tanosJlptPath/n3.csv'),
'N4': File('$tanosJlptPath/n4.csv'),
'N5': File('$tanosJlptPath/n5.csv'),
};
print('[TANOS-JLPT] Parsing content...');
@@ -42,12 +42,18 @@ class CreateTanosJlptMappings extends Command {
final useOverrides = argResults!.flag('overrides');
final tanosJlptPath =
Platform.environment['TANOS_JLPT_PATH'] ?? 'data/tanos-jlpt';
if (!Directory(tanosJlptPath).existsSync()) {
throw Exception('TANOS-JLPT directory not found at $tanosJlptPath');
}
final Map<String, File> files = {
'N1': File('data/tanos-jlpt/n1.csv'),
'N2': File('data/tanos-jlpt/n2.csv'),
'N3': File('data/tanos-jlpt/n3.csv'),
'N4': File('data/tanos-jlpt/n4.csv'),
'N5': File('data/tanos-jlpt/n5.csv'),
'N1': File('$tanosJlptPath/n1.csv'),
'N2': File('$tanosJlptPath/n2.csv'),
'N3': File('$tanosJlptPath/n3.csv'),
'N4': File('$tanosJlptPath/n4.csv'),
'N5': File('$tanosJlptPath/n5.csv'),
};
final rankedWords = await parseJLPTRankedWords(files);
+4 -6
View File
@@ -20,18 +20,22 @@ stdenvNoCC.mkDerivation {
];
env = {
JMDICT_PATH = "${jmdict}/JMdict.xml";
JMDICT_VERSION = jmdict.version;
JMDICT_DATE = jmdict.date;
JMDICT_HASH = jmdict.hash;
KANJIDIC_PATH = "${kanjidic2}/kanjidic2.xml";
KANJIDIC_VERSION = kanjidic2.version;
KANJIDIC_DATE = kanjidic2.date;
KANJIDIC_HASH = kanjidic2.hash;
RADKFILE_PATH = "${radkfile}/RADKFILE";
RADKFILE_VERSION = radkfile.version;
RADKFILE_DATE = radkfile.date;
RADKFILE_HASH = radkfile.hash;
TANOS_JLPT_PATH = toString tanos-jlpt;
TANOS_JLPT_VERSION = tanos-jlpt.version;
TANOS_JLPT_DATE = tanos-jlpt.date;
TANOS_JLPT_HASH = tanos-jlpt.hash;
@@ -40,12 +44,6 @@ stdenvNoCC.mkDerivation {
buildPhase = ''
runHook preBuild
mkdir -p data
ln -s '${jmdict}'/* data/
ln -s '${kanjidic2}'/* data/
ln -s '${radkfile}'/* data/
ln -s '${tanos-jlpt}' data/tanos-jlpt
for migration in migrations/*.sql; do
sqlite3 jadb.sqlite < "$migration"
done