This commit is contained in:
Generated
+17
@@ -20,6 +20,22 @@
|
||||
"url": "https://git.pvv.ntnu.no/Mugiten/datasources.git"
|
||||
}
|
||||
},
|
||||
"kanjivg-src": {
|
||||
"flake": false,
|
||||
"locked": {
|
||||
"lastModified": 1775218066,
|
||||
"narHash": "sha256-iYv9xakgoGt/JwwdKDUCpSAF36hBtKlX9oN7xiLowjs=",
|
||||
"ref": "refs/heads/master",
|
||||
"rev": "544d319f79348c092d567b662f27f33dacfa60cd",
|
||||
"revCount": 2215,
|
||||
"type": "git",
|
||||
"url": "https://git.pvv.ntnu.no/mugiten/kanjivg.git"
|
||||
},
|
||||
"original": {
|
||||
"type": "git",
|
||||
"url": "https://git.pvv.ntnu.no/mugiten/kanjivg.git"
|
||||
}
|
||||
},
|
||||
"nixpkgs": {
|
||||
"locked": {
|
||||
"lastModified": 1779560665,
|
||||
@@ -38,6 +54,7 @@
|
||||
"root": {
|
||||
"inputs": {
|
||||
"datasources": "datasources",
|
||||
"kanjivg-src": "kanjivg-src",
|
||||
"nixpkgs": "nixpkgs"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -8,12 +8,18 @@
|
||||
url = "git+https://git.pvv.ntnu.no/Mugiten/datasources.git";
|
||||
inputs.nixpkgs.follows = "nixpkgs";
|
||||
};
|
||||
|
||||
kanjivg-src = {
|
||||
url = "git+https://git.pvv.ntnu.no/mugiten/kanjivg.git";
|
||||
flake = false;
|
||||
};
|
||||
};
|
||||
|
||||
outputs = {
|
||||
self,
|
||||
nixpkgs,
|
||||
datasources,
|
||||
kanjivg-src,
|
||||
}: let
|
||||
inherit (nixpkgs) lib;
|
||||
systems = [
|
||||
@@ -124,6 +130,7 @@
|
||||
|
||||
database = pkgs.callPackage ./nix/database.nix {
|
||||
inherit (datasources.packages.${system}) jmdict radkfile kanjidic2 tanos-jlpt;
|
||||
kanjivg = kanjivg-src;
|
||||
inherit (self.packages.${system}) database-tool;
|
||||
inherit src;
|
||||
};
|
||||
|
||||
@@ -0,0 +1,92 @@
|
||||
import 'package:jadb/_data_ingestion/sql_writable.dart';
|
||||
|
||||
/// Enum set in the kvg:position attribute, used by `<g>` elements in the KanjiVG SVG files.
|
||||
enum KanjiPathGroupPosition {
|
||||
bottom,
|
||||
kamae,
|
||||
kamaec,
|
||||
left,
|
||||
middle,
|
||||
nyo,
|
||||
nyoc,
|
||||
right,
|
||||
tare,
|
||||
tarec,
|
||||
top,
|
||||
}
|
||||
|
||||
/// Contents of a \<g> element in the KanjiVG SVG files.
|
||||
class KanjiPathGroupTreeNode extends SQLWritable {
|
||||
final String id;
|
||||
final List<KanjiPathGroupTreeNode> children;
|
||||
final String? element;
|
||||
final String? original;
|
||||
final KanjiPathGroupPosition? position;
|
||||
final String? radical;
|
||||
final int? part;
|
||||
|
||||
KanjiPathGroupTreeNode({
|
||||
required this.id,
|
||||
this.children = const [],
|
||||
this.element,
|
||||
this.original,
|
||||
this.position,
|
||||
this.radical,
|
||||
this.part,
|
||||
});
|
||||
|
||||
@override
|
||||
Map<String, Object?> get sqlValue => {
|
||||
'id': id,
|
||||
'element': element,
|
||||
'original': original,
|
||||
'position': position?.name,
|
||||
'radical': radical,
|
||||
'part': part,
|
||||
};
|
||||
}
|
||||
|
||||
/// Contents of a `<text>` element in the StrokeNumber's group in the KanjiVG SVG files
|
||||
class KanjiStrokeNumber extends SQLWritable {
|
||||
final int num;
|
||||
final double x;
|
||||
final double y;
|
||||
|
||||
KanjiStrokeNumber(this.num, this.x, this.y);
|
||||
|
||||
@override
|
||||
Map<String, Object?> get sqlValue => {'num': num, 'x': x, 'y': y};
|
||||
}
|
||||
|
||||
/// Contents of a `<path>` element in the KanjiVG SVG files
|
||||
class KanjiVGPath extends SQLWritable {
|
||||
final String id;
|
||||
final String type;
|
||||
final String svgPath;
|
||||
|
||||
KanjiVGPath({required this.id, required this.type, required this.svgPath});
|
||||
|
||||
@override
|
||||
Map<String, Object?> get sqlValue => {
|
||||
'id': id,
|
||||
'type': type,
|
||||
'svgPath': svgPath,
|
||||
};
|
||||
}
|
||||
|
||||
class KanjiVGItem extends SQLWritable {
|
||||
final String character;
|
||||
final List<KanjiVGPath> paths;
|
||||
final List<KanjiStrokeNumber> strokeNumbers;
|
||||
final List<KanjiPathGroupTreeNode> pathGroups;
|
||||
|
||||
KanjiVGItem({
|
||||
required this.character,
|
||||
required this.paths,
|
||||
required this.strokeNumbers,
|
||||
required this.pathGroups,
|
||||
});
|
||||
|
||||
@override
|
||||
Map<String, Object?> get sqlValue => {'character': character};
|
||||
}
|
||||
@@ -0,0 +1,17 @@
|
||||
import 'dart:io';
|
||||
|
||||
import 'package:jadb/_data_ingestion/kanjivg/objects.dart';
|
||||
|
||||
List<KanjiVGItem> parseKanjiVGData(Directory rootDir) {
|
||||
final List<KanjiVGItem> items = [];
|
||||
|
||||
for (final file in rootDir.listSync()) {
|
||||
if (file is File && file.path.endsWith('.svg')) {
|
||||
final String rawSVG = file.readAsStringSync();
|
||||
print('Parsing ${file.path}...');
|
||||
print(rawSVG);
|
||||
}
|
||||
}
|
||||
|
||||
return [];
|
||||
}
|
||||
@@ -0,0 +1,8 @@
|
||||
import 'package:jadb/_data_ingestion/kanjivg/objects.dart';
|
||||
import 'package:sqflite_common/sqflite.dart';
|
||||
|
||||
Future<void> seedKanjiVGData(Iterable<KanjiVGItem> items, Database db) async {
|
||||
final b = db.batch();
|
||||
|
||||
await b.commit(noResult: true);
|
||||
}
|
||||
@@ -4,6 +4,8 @@ import 'package:jadb/_data_ingestion/jmdict/seed_data.dart';
|
||||
import 'package:jadb/_data_ingestion/jmdict/xml_parser.dart';
|
||||
import 'package:jadb/_data_ingestion/kanjidic/seed_data.dart';
|
||||
import 'package:jadb/_data_ingestion/kanjidic/xml_parser.dart';
|
||||
import 'package:jadb/_data_ingestion/kanjivg/parser.dart';
|
||||
import 'package:jadb/_data_ingestion/kanjivg/seed_data.dart';
|
||||
import 'package:jadb/_data_ingestion/radkfile/parser.dart';
|
||||
import 'package:jadb/_data_ingestion/radkfile/seed_data.dart';
|
||||
import 'package:jadb/_data_ingestion/tanos-jlpt/csv_parser.dart';
|
||||
@@ -17,6 +19,7 @@ Future<void> seedData(Database db) async {
|
||||
await parseAndSeedDataFromRADKFILE(db);
|
||||
await parseAndSeedDataFromKANJIDIC(db);
|
||||
await parseAndSeedDataFromTanosJLPT(db);
|
||||
await parseAndSeedDataFromKanjiVG(db);
|
||||
|
||||
print('Performing VACUUM');
|
||||
await db.execute('VACUUM');
|
||||
@@ -102,3 +105,17 @@ Future<void> parseAndSeedDataFromTanosJLPT(Database db) async {
|
||||
print('[TANOS-JLPT] Writing to database...');
|
||||
await seedTanosJLPTData(resolvedEntries, db);
|
||||
}
|
||||
|
||||
Future<void> parseAndSeedDataFromKanjiVG(Database db) async {
|
||||
final kanjivgPath =
|
||||
Platform.environment['KANJIVG_PATH'] ?? 'data/kanjivg';
|
||||
if (!Directory(kanjivgPath).existsSync()) {
|
||||
throw Exception('KANJIVG directory not found at $kanjivgPath');
|
||||
}
|
||||
|
||||
print('[KANJIVG] Parsing content...');
|
||||
final items = parseKanjiVGData(Directory(kanjivgPath));
|
||||
|
||||
print('[KANJIVG] Writing to database...');
|
||||
await seedKanjiVGData(items, db);
|
||||
}
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import 'package:jadb/table_names/jmdict.dart';
|
||||
import 'package:jadb/table_names/kanjidic.dart';
|
||||
import 'package:jadb/table_names/kanjivg.dart';
|
||||
import 'package:jadb/table_names/radkfile.dart';
|
||||
import 'package:jadb/table_names/tanos_jlpt.dart';
|
||||
import 'package:sqflite_common/sqlite_api.dart';
|
||||
@@ -21,6 +22,7 @@ Future<void> verifyTablesWithDbConnection(DatabaseExecutor db) async {
|
||||
...KANJIDICTableNames.allTables,
|
||||
...RADKFILETableNames.allTables,
|
||||
...TanosJLPTTableNames.allTables,
|
||||
...KanjiVGTableNames.allTables,
|
||||
};
|
||||
|
||||
final missingTables = expectedTables.difference(tables);
|
||||
|
||||
@@ -0,0 +1,9 @@
|
||||
abstract class KanjiVGTableNames {
|
||||
static const String version = 'KanjiVG_Version';
|
||||
static const String entry = 'KanjiVG_Entry';
|
||||
static const String path = 'KanjiVG_Path';
|
||||
static const String strokeNumber = 'KanjiVG_StrokeNumber';
|
||||
static const String pathGroup = 'KanjiVG_PathGroup';
|
||||
|
||||
static Set<String> get allTables => {version, entry, path, strokeNumber, pathGroup};
|
||||
}
|
||||
@@ -0,0 +1,45 @@
|
||||
CREATE TABLE "KanjiVG_Version" (
|
||||
"version" VARCHAR(10) PRIMARY KEY NOT NULL,
|
||||
"date" DATE NOT NULL,
|
||||
"hash" VARCHAR(64) NOT NULL
|
||||
) WITHOUT ROWID;
|
||||
|
||||
CREATE TRIGGER "KanjiVG_Version_SingleRow"
|
||||
BEFORE INSERT ON "KanjiVG_Version"
|
||||
WHEN (SELECT COUNT(*) FROM "KanjiVG_Version") >= 1
|
||||
BEGIN
|
||||
SELECT RAISE(FAIL, 'Only one row allowed in KanjiVG_Version');
|
||||
END;
|
||||
|
||||
CREATE TABLE "KanjiVG_Entry" (
|
||||
"character" CHAR(1) PRIMARY KEY NOT NULL
|
||||
) WITHOUT ROWID;
|
||||
|
||||
CREATE TABLE "KanjiVG_StrokeNumber" (
|
||||
"character" CHAR(1) NOT NULL REFERENCES "KanjiVG_Entry"("character"),
|
||||
"strokeNum" INTEGER NOT NULL,
|
||||
"x" REAL NOT NULL,
|
||||
"y" REAL NOT NULL,
|
||||
PRIMARY KEY ("character", "strokeNum")
|
||||
) WITHOUT ROWID;
|
||||
|
||||
CREATE TABLE "KanjiVG_Path" (
|
||||
"character" CHAR(1) NOT NULL REFERENCES "KanjiVG_Entry"("character"),
|
||||
"pathId" TEXT NOT NULL,
|
||||
"type" VARCHAR(10) NOT NULL,
|
||||
"svgPath" TEXT NOT NULL,
|
||||
PRIMARY KEY ("character", "pathId")
|
||||
) WITHOUT ROWID;
|
||||
|
||||
CREATE TABLE "KanjiVG_PathGroup" (
|
||||
"character" CHAR(1) NOT NULL REFERENCES "KanjiVG_Entry"("character"),
|
||||
"groupId" TEXT NOT NULL,
|
||||
"parentGroupId" TEXT REFERENCES "KanjiVG_PathGroup"("groupId"),
|
||||
"element" TEXT,
|
||||
"original" TEXT,
|
||||
"position" VARCHAR(10),
|
||||
"radical" TEXT,
|
||||
"part" INTEGER,
|
||||
PRIMARY KEY ("character", "groupId"),
|
||||
CHECK ("position" IN ('bottom', 'kamae', 'kamaec', 'left', 'middle', 'nyo', 'nyoc', 'right', 'tare', 'tarec', 'top') OR "position" IS NULL)
|
||||
) WITHOUT ROWID;
|
||||
@@ -7,6 +7,7 @@
|
||||
radkfile,
|
||||
kanjidic2,
|
||||
tanos-jlpt,
|
||||
kanjivg,
|
||||
sqlite,
|
||||
wal ? false,
|
||||
}:
|
||||
@@ -39,6 +40,8 @@ stdenvNoCC.mkDerivation {
|
||||
TANOS_JLPT_VERSION = tanos-jlpt.version;
|
||||
TANOS_JLPT_DATE = tanos-jlpt.date;
|
||||
TANOS_JLPT_HASH = tanos-jlpt.hash;
|
||||
|
||||
KANJIVG_PATH = "${kanjivg}/kanji";
|
||||
};
|
||||
|
||||
buildPhase = ''
|
||||
|
||||
Reference in New Issue
Block a user