WIP: add kanjivg data
Build and test / build (push) Successful in 7m20s

This commit is contained in:
2026-03-03 13:47:59 +09:00
parent f0e919c397
commit 81954056b2
10 changed files with 217 additions and 0 deletions
Generated
+17
View File
@@ -20,6 +20,22 @@
"url": "https://git.pvv.ntnu.no/Mugiten/datasources.git"
}
},
"kanjivg-src": {
"flake": false,
"locked": {
"lastModified": 1775218066,
"narHash": "sha256-iYv9xakgoGt/JwwdKDUCpSAF36hBtKlX9oN7xiLowjs=",
"ref": "refs/heads/master",
"rev": "544d319f79348c092d567b662f27f33dacfa60cd",
"revCount": 2215,
"type": "git",
"url": "https://git.pvv.ntnu.no/mugiten/kanjivg.git"
},
"original": {
"type": "git",
"url": "https://git.pvv.ntnu.no/mugiten/kanjivg.git"
}
},
"nixpkgs": {
"locked": {
"lastModified": 1779560665,
@@ -38,6 +54,7 @@
"root": {
"inputs": {
"datasources": "datasources",
"kanjivg-src": "kanjivg-src",
"nixpkgs": "nixpkgs"
}
}
+7
View File
@@ -8,12 +8,18 @@
url = "git+https://git.pvv.ntnu.no/Mugiten/datasources.git";
inputs.nixpkgs.follows = "nixpkgs";
};
kanjivg-src = {
url = "git+https://git.pvv.ntnu.no/mugiten/kanjivg.git";
flake = false;
};
};
outputs = {
self,
nixpkgs,
datasources,
kanjivg-src,
}: let
inherit (nixpkgs) lib;
systems = [
@@ -124,6 +130,7 @@
database = pkgs.callPackage ./nix/database.nix {
inherit (datasources.packages.${system}) jmdict radkfile kanjidic2 tanos-jlpt;
kanjivg = kanjivg-src;
inherit (self.packages.${system}) database-tool;
inherit src;
};
+92
View File
@@ -0,0 +1,92 @@
import 'package:jadb/_data_ingestion/sql_writable.dart';
/// Enum set in the kvg:position attribute, used by `<g>` elements in the KanjiVG SVG files.
enum KanjiPathGroupPosition {
bottom,
kamae,
kamaec,
left,
middle,
nyo,
nyoc,
right,
tare,
tarec,
top,
}
/// Contents of a \<g> element in the KanjiVG SVG files.
class KanjiPathGroupTreeNode extends SQLWritable {
final String id;
final List<KanjiPathGroupTreeNode> children;
final String? element;
final String? original;
final KanjiPathGroupPosition? position;
final String? radical;
final int? part;
KanjiPathGroupTreeNode({
required this.id,
this.children = const [],
this.element,
this.original,
this.position,
this.radical,
this.part,
});
@override
Map<String, Object?> get sqlValue => {
'id': id,
'element': element,
'original': original,
'position': position?.name,
'radical': radical,
'part': part,
};
}
/// Contents of a `<text>` element in the StrokeNumber's group in the KanjiVG SVG files
class KanjiStrokeNumber extends SQLWritable {
final int num;
final double x;
final double y;
KanjiStrokeNumber(this.num, this.x, this.y);
@override
Map<String, Object?> get sqlValue => {'num': num, 'x': x, 'y': y};
}
/// Contents of a `<path>` element in the KanjiVG SVG files
class KanjiVGPath extends SQLWritable {
final String id;
final String type;
final String svgPath;
KanjiVGPath({required this.id, required this.type, required this.svgPath});
@override
Map<String, Object?> get sqlValue => {
'id': id,
'type': type,
'svgPath': svgPath,
};
}
class KanjiVGItem extends SQLWritable {
final String character;
final List<KanjiVGPath> paths;
final List<KanjiStrokeNumber> strokeNumbers;
final List<KanjiPathGroupTreeNode> pathGroups;
KanjiVGItem({
required this.character,
required this.paths,
required this.strokeNumbers,
required this.pathGroups,
});
@override
Map<String, Object?> get sqlValue => {'character': character};
}
+17
View File
@@ -0,0 +1,17 @@
import 'dart:io';
import 'package:jadb/_data_ingestion/kanjivg/objects.dart';
List<KanjiVGItem> parseKanjiVGData(Directory rootDir) {
final List<KanjiVGItem> items = [];
for (final file in rootDir.listSync()) {
if (file is File && file.path.endsWith('.svg')) {
final String rawSVG = file.readAsStringSync();
print('Parsing ${file.path}...');
print(rawSVG);
}
}
return [];
}
@@ -0,0 +1,8 @@
import 'package:jadb/_data_ingestion/kanjivg/objects.dart';
import 'package:sqflite_common/sqflite.dart';
Future<void> seedKanjiVGData(Iterable<KanjiVGItem> items, Database db) async {
final b = db.batch();
await b.commit(noResult: true);
}
+17
View File
@@ -4,6 +4,8 @@ import 'package:jadb/_data_ingestion/jmdict/seed_data.dart';
import 'package:jadb/_data_ingestion/jmdict/xml_parser.dart';
import 'package:jadb/_data_ingestion/kanjidic/seed_data.dart';
import 'package:jadb/_data_ingestion/kanjidic/xml_parser.dart';
import 'package:jadb/_data_ingestion/kanjivg/parser.dart';
import 'package:jadb/_data_ingestion/kanjivg/seed_data.dart';
import 'package:jadb/_data_ingestion/radkfile/parser.dart';
import 'package:jadb/_data_ingestion/radkfile/seed_data.dart';
import 'package:jadb/_data_ingestion/tanos-jlpt/csv_parser.dart';
@@ -17,6 +19,7 @@ Future<void> seedData(Database db) async {
await parseAndSeedDataFromRADKFILE(db);
await parseAndSeedDataFromKANJIDIC(db);
await parseAndSeedDataFromTanosJLPT(db);
await parseAndSeedDataFromKanjiVG(db);
print('Performing VACUUM');
await db.execute('VACUUM');
@@ -102,3 +105,17 @@ Future<void> parseAndSeedDataFromTanosJLPT(Database db) async {
print('[TANOS-JLPT] Writing to database...');
await seedTanosJLPTData(resolvedEntries, db);
}
Future<void> parseAndSeedDataFromKanjiVG(Database db) async {
final kanjivgPath =
Platform.environment['KANJIVG_PATH'] ?? 'data/kanjivg';
if (!Directory(kanjivgPath).existsSync()) {
throw Exception('KANJIVG directory not found at $kanjivgPath');
}
print('[KANJIVG] Parsing content...');
final items = parseKanjiVGData(Directory(kanjivgPath));
print('[KANJIVG] Writing to database...');
await seedKanjiVGData(items, db);
}
+2
View File
@@ -1,5 +1,6 @@
import 'package:jadb/table_names/jmdict.dart';
import 'package:jadb/table_names/kanjidic.dart';
import 'package:jadb/table_names/kanjivg.dart';
import 'package:jadb/table_names/radkfile.dart';
import 'package:jadb/table_names/tanos_jlpt.dart';
import 'package:sqflite_common/sqlite_api.dart';
@@ -21,6 +22,7 @@ Future<void> verifyTablesWithDbConnection(DatabaseExecutor db) async {
...KANJIDICTableNames.allTables,
...RADKFILETableNames.allTables,
...TanosJLPTTableNames.allTables,
...KanjiVGTableNames.allTables,
};
final missingTables = expectedTables.difference(tables);
+9
View File
@@ -0,0 +1,9 @@
abstract class KanjiVGTableNames {
static const String version = 'KanjiVG_Version';
static const String entry = 'KanjiVG_Entry';
static const String path = 'KanjiVG_Path';
static const String strokeNumber = 'KanjiVG_StrokeNumber';
static const String pathGroup = 'KanjiVG_PathGroup';
static Set<String> get allTables => {version, entry, path, strokeNumber, pathGroup};
}
+45
View File
@@ -0,0 +1,45 @@
CREATE TABLE "KanjiVG_Version" (
"version" VARCHAR(10) PRIMARY KEY NOT NULL,
"date" DATE NOT NULL,
"hash" VARCHAR(64) NOT NULL
) WITHOUT ROWID;
CREATE TRIGGER "KanjiVG_Version_SingleRow"
BEFORE INSERT ON "KanjiVG_Version"
WHEN (SELECT COUNT(*) FROM "KanjiVG_Version") >= 1
BEGIN
SELECT RAISE(FAIL, 'Only one row allowed in KanjiVG_Version');
END;
CREATE TABLE "KanjiVG_Entry" (
"character" CHAR(1) PRIMARY KEY NOT NULL
) WITHOUT ROWID;
CREATE TABLE "KanjiVG_StrokeNumber" (
"character" CHAR(1) NOT NULL REFERENCES "KanjiVG_Entry"("character"),
"strokeNum" INTEGER NOT NULL,
"x" REAL NOT NULL,
"y" REAL NOT NULL,
PRIMARY KEY ("character", "strokeNum")
) WITHOUT ROWID;
CREATE TABLE "KanjiVG_Path" (
"character" CHAR(1) NOT NULL REFERENCES "KanjiVG_Entry"("character"),
"pathId" TEXT NOT NULL,
"type" VARCHAR(10) NOT NULL,
"svgPath" TEXT NOT NULL,
PRIMARY KEY ("character", "pathId")
) WITHOUT ROWID;
CREATE TABLE "KanjiVG_PathGroup" (
"character" CHAR(1) NOT NULL REFERENCES "KanjiVG_Entry"("character"),
"groupId" TEXT NOT NULL,
"parentGroupId" TEXT REFERENCES "KanjiVG_PathGroup"("groupId"),
"element" TEXT,
"original" TEXT,
"position" VARCHAR(10),
"radical" TEXT,
"part" INTEGER,
PRIMARY KEY ("character", "groupId"),
CHECK ("position" IN ('bottom', 'kamae', 'kamaec', 'left', 'middle', 'nyo', 'nyoc', 'right', 'tare', 'tarec', 'top') OR "position" IS NULL)
) WITHOUT ROWID;
+3
View File
@@ -7,6 +7,7 @@
radkfile,
kanjidic2,
tanos-jlpt,
kanjivg,
sqlite,
wal ? false,
}:
@@ -39,6 +40,8 @@ stdenvNoCC.mkDerivation {
TANOS_JLPT_VERSION = tanos-jlpt.version;
TANOS_JLPT_DATE = tanos-jlpt.date;
TANOS_JLPT_HASH = tanos-jlpt.hash;
KANJIVG_PATH = "${kanjivg}/kanji";
};
buildPhase = ''