This commit is contained in:
Generated
+17
@@ -35,6 +35,22 @@
|
||||
"url": "https://git.pvv.ntnu.no/Mugiten/datasources.git"
|
||||
}
|
||||
},
|
||||
"kanjivg-src": {
|
||||
"flake": false,
|
||||
"locked": {
|
||||
"lastModified": 1778620714,
|
||||
"narHash": "sha256-LwNcY5A6XPGI+DASZfmP7OeYe8IFesShhSrE7Go2ux8=",
|
||||
"ref": "refs/heads/master",
|
||||
"rev": "1957802840a6f059d1e27dcb5755722955cc7dbb",
|
||||
"revCount": 2217,
|
||||
"type": "git",
|
||||
"url": "https://git.pvv.ntnu.no/mugiten/kanjivg.git"
|
||||
},
|
||||
"original": {
|
||||
"type": "git",
|
||||
"url": "https://git.pvv.ntnu.no/mugiten/kanjivg.git"
|
||||
}
|
||||
},
|
||||
"nix-sqlite": {
|
||||
"inputs": {
|
||||
"nixpkgs": [
|
||||
@@ -75,6 +91,7 @@
|
||||
"root": {
|
||||
"inputs": {
|
||||
"datasources": "datasources",
|
||||
"kanjivg-src": "kanjivg-src",
|
||||
"nixpkgs": "nixpkgs",
|
||||
"tamerye": "tamerye"
|
||||
}
|
||||
|
||||
@@ -13,6 +13,11 @@
|
||||
url = "git+https://git.pvv.ntnu.no/Mugiten/datasources.git";
|
||||
inputs.nixpkgs.follows = "nixpkgs";
|
||||
};
|
||||
|
||||
kanjivg-src = {
|
||||
url = "git+https://git.pvv.ntnu.no/mugiten/kanjivg.git";
|
||||
flake = false;
|
||||
};
|
||||
};
|
||||
|
||||
outputs = {
|
||||
@@ -20,6 +25,7 @@
|
||||
nixpkgs,
|
||||
tamerye,
|
||||
datasources,
|
||||
kanjivg-src,
|
||||
}: let
|
||||
inherit (nixpkgs) lib;
|
||||
systems = [
|
||||
@@ -139,6 +145,7 @@
|
||||
database = pkgs.callPackage ./nix/database.nix {
|
||||
sqlite = pkgs.tamerye-sqlite-cli;
|
||||
inherit (datasources.packages.${system}) jmdict radkfile kanjidic2 tanos-jlpt;
|
||||
kanjivg = kanjivg-src;
|
||||
inherit (self.packages.${system}) database-tool;
|
||||
inherit src;
|
||||
};
|
||||
@@ -146,6 +153,7 @@
|
||||
database-wal = pkgs.callPackage ./nix/database.nix {
|
||||
sqlite = pkgs.tamerye-sqlite-cli;
|
||||
inherit (datasources.packages.${system}) jmdict radkfile kanjidic2 tanos-jlpt;
|
||||
kanjivg = kanjivg-src;
|
||||
inherit (self.packages.${system}) database-tool;
|
||||
inherit src;
|
||||
wal = true;
|
||||
|
||||
@@ -0,0 +1,168 @@
|
||||
import 'package:jadb/_data_ingestion/sql_writable.dart';
|
||||
|
||||
/// Enum set in the kvg:position attribute, used by `<g>` elements in the KanjiVG SVG files.
|
||||
enum KanjiPathGroupPosition {
|
||||
upperA,
|
||||
upperB,
|
||||
lower1,
|
||||
lower2,
|
||||
bottom,
|
||||
kamae,
|
||||
kamaec,
|
||||
left,
|
||||
middle,
|
||||
nyo,
|
||||
nyoc,
|
||||
right,
|
||||
tare,
|
||||
tarec,
|
||||
top;
|
||||
|
||||
static KanjiPathGroupPosition? fromString(String? str) {
|
||||
if (str == null) return null;
|
||||
switch (str) {
|
||||
case '⿵A':
|
||||
return KanjiPathGroupPosition.upperA;
|
||||
case '⿵B':
|
||||
return KanjiPathGroupPosition.upperB;
|
||||
case '⿶1':
|
||||
return KanjiPathGroupPosition.lower1;
|
||||
case '⿶2':
|
||||
return KanjiPathGroupPosition.lower2;
|
||||
case 'bottom':
|
||||
return KanjiPathGroupPosition.bottom;
|
||||
case 'kamae':
|
||||
return KanjiPathGroupPosition.kamae;
|
||||
case 'kamaec':
|
||||
return KanjiPathGroupPosition.kamaec;
|
||||
case 'left':
|
||||
return KanjiPathGroupPosition.left;
|
||||
case 'middle':
|
||||
return KanjiPathGroupPosition.middle;
|
||||
case 'nyo':
|
||||
return KanjiPathGroupPosition.nyo;
|
||||
case 'nyoc':
|
||||
return KanjiPathGroupPosition.nyoc;
|
||||
case 'right':
|
||||
return KanjiPathGroupPosition.right;
|
||||
case 'tare':
|
||||
return KanjiPathGroupPosition.tare;
|
||||
case 'tarec':
|
||||
return KanjiPathGroupPosition.tarec;
|
||||
case 'top':
|
||||
return KanjiPathGroupPosition.top;
|
||||
default:
|
||||
throw ArgumentError('Unknown position: $str');
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
enum KanjiVGRadical {
|
||||
general,
|
||||
jis,
|
||||
nelson,
|
||||
tradit;
|
||||
|
||||
static KanjiVGRadical? fromString(String? str) {
|
||||
if (str == null) return null;
|
||||
switch (str) {
|
||||
case 'general':
|
||||
return KanjiVGRadical.general;
|
||||
case 'jis':
|
||||
return KanjiVGRadical.jis;
|
||||
case 'nelson':
|
||||
return KanjiVGRadical.nelson;
|
||||
case 'tradit':
|
||||
return KanjiVGRadical.tradit;
|
||||
default:
|
||||
throw ArgumentError('Unknown radical: $str');
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Contents of a \<g> element in the KanjiVG SVG files.
|
||||
class KanjiPathGroupTreeNode extends SQLWritable {
|
||||
final int id;
|
||||
final List<KanjiPathGroupTreeNode> children;
|
||||
final String? element;
|
||||
final String? original;
|
||||
final KanjiPathGroupPosition? position;
|
||||
final KanjiVGRadical? radical;
|
||||
final int? part;
|
||||
|
||||
// Currently unused data.
|
||||
final bool radicalForm;
|
||||
final bool tradForm;
|
||||
final bool partial;
|
||||
final String? variant;
|
||||
|
||||
KanjiPathGroupTreeNode({
|
||||
required this.id,
|
||||
this.children = const [],
|
||||
this.element,
|
||||
this.original,
|
||||
this.position,
|
||||
this.radical,
|
||||
this.part,
|
||||
|
||||
this.variant,
|
||||
this.radicalForm = false,
|
||||
this.tradForm = false,
|
||||
this.partial = false,
|
||||
});
|
||||
|
||||
@override
|
||||
Map<String, Object?> get sqlValue => {
|
||||
'groupId': id,
|
||||
'element': element,
|
||||
'original': original,
|
||||
'position': position?.name,
|
||||
'radical': radical?.name,
|
||||
'part': part,
|
||||
};
|
||||
}
|
||||
|
||||
/// Contents of a `<text>` element in the StrokeNumber's group in the KanjiVG SVG files
|
||||
class KanjiStrokeNumber extends SQLWritable {
|
||||
final int num;
|
||||
final double x;
|
||||
final double y;
|
||||
|
||||
KanjiStrokeNumber(this.num, this.x, this.y);
|
||||
|
||||
@override
|
||||
Map<String, Object?> get sqlValue => {'strokeNum': num, 'x': x, 'y': y};
|
||||
}
|
||||
|
||||
/// Contents of a `<path>` element in the KanjiVG SVG files
|
||||
class KanjiVGPath extends SQLWritable {
|
||||
final int id;
|
||||
final String? type;
|
||||
final String svgPath;
|
||||
|
||||
KanjiVGPath({required this.id, required this.type, required this.svgPath});
|
||||
|
||||
@override
|
||||
Map<String, Object?> get sqlValue => {
|
||||
'pathId': id,
|
||||
'type': type,
|
||||
'svgPath': svgPath,
|
||||
};
|
||||
}
|
||||
|
||||
class KanjiVGItem extends SQLWritable {
|
||||
final String character;
|
||||
final List<KanjiVGPath> paths;
|
||||
final List<KanjiStrokeNumber> strokeNumbers;
|
||||
final List<KanjiPathGroupTreeNode> pathGroups;
|
||||
|
||||
KanjiVGItem({
|
||||
required this.character,
|
||||
required this.paths,
|
||||
required this.strokeNumbers,
|
||||
required this.pathGroups,
|
||||
});
|
||||
|
||||
@override
|
||||
Map<String, Object?> get sqlValue => {'character': character};
|
||||
}
|
||||
@@ -0,0 +1,100 @@
|
||||
import 'dart:io';
|
||||
|
||||
import 'package:collection/collection.dart';
|
||||
import 'package:jadb/_data_ingestion/kanjivg/objects.dart';
|
||||
import 'package:xml/xml.dart';
|
||||
|
||||
List<KanjiVGItem> parseKanjiVGData(Directory rootDir) {
|
||||
final List<KanjiVGItem> items = [];
|
||||
|
||||
for (final file in rootDir.listSync()) {
|
||||
if (file is File && file.path.endsWith('.svg')) {
|
||||
final String rawSVG = file.readAsStringSync();
|
||||
final XmlDocument doc = XmlDocument.parse(rawSVG);
|
||||
|
||||
final strokePathsGroup = doc
|
||||
.findAllElements('g')
|
||||
.firstWhereOrNull(
|
||||
(e) => e.getAttribute('id')?.startsWith('kvg:StrokePaths') ?? false,
|
||||
);
|
||||
|
||||
final strokeNumbersGroup = doc
|
||||
.findAllElements('g')
|
||||
.firstWhereOrNull(
|
||||
(e) =>
|
||||
e.getAttribute('id')?.startsWith('kvg:StrokeNumbers') ?? false,
|
||||
);
|
||||
|
||||
final pathGroups = strokePathsGroup != null
|
||||
? _parsePathGroups(strokePathsGroup)
|
||||
: <KanjiPathGroupTreeNode>[];
|
||||
|
||||
final strokeNumbers = strokeNumbersGroup != null
|
||||
? _parseStrokeNumbers(strokeNumbersGroup)
|
||||
: <KanjiStrokeNumber>[];
|
||||
|
||||
final paths = strokePathsGroup != null
|
||||
? _parsePaths(strokePathsGroup)
|
||||
: <KanjiVGPath>[];
|
||||
|
||||
items.add(
|
||||
KanjiVGItem(
|
||||
character: file.uri.pathSegments.last.split('.').first,
|
||||
paths: paths,
|
||||
strokeNumbers: strokeNumbers,
|
||||
pathGroups: pathGroups,
|
||||
),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
return items;
|
||||
}
|
||||
|
||||
List<KanjiStrokeNumber> _parseStrokeNumbers(XmlElement group) => group
|
||||
.childElements
|
||||
.map((e) {
|
||||
final num = int.parse(e.innerText);
|
||||
final xy = e
|
||||
.getAttribute('transform')!
|
||||
.split('matrix(1 0 0 1 ')[1]
|
||||
.split(')')[0]
|
||||
.split(' ')
|
||||
.map(double.parse)
|
||||
.toList();
|
||||
return KanjiStrokeNumber(num, xy[0], xy[1]);
|
||||
})
|
||||
.toList(growable: false);
|
||||
|
||||
List<KanjiPathGroupTreeNode> _parsePathGroups(XmlElement group) => group
|
||||
.findElements('g')
|
||||
.map((e) {
|
||||
return KanjiPathGroupTreeNode(
|
||||
// NOTE: the outermost group does not have a number
|
||||
id: int.tryParse(e.getAttribute('id')!.split('-').last.substring(1)) ?? 0,
|
||||
element: e.getAttribute('kvg:element'),
|
||||
original: e.getAttribute('kvg:original'),
|
||||
variant: e.getAttribute('kvg:variant'),
|
||||
position: KanjiPathGroupPosition.fromString(
|
||||
e.getAttribute('kvg:position'),
|
||||
),
|
||||
radical: KanjiVGRadical.fromString(e.getAttribute('kvg:radical')),
|
||||
part: int.tryParse(e.getAttribute('kvg:part') ?? ''),
|
||||
radicalForm: e.getAttribute('kvg:radicalForm') == 'true',
|
||||
tradForm: e.getAttribute('kvg:tradForm') == 'true',
|
||||
partial: e.getAttribute('kvg:partial') == 'true',
|
||||
children: _parsePathGroups(e),
|
||||
);
|
||||
})
|
||||
.toList(growable: false);
|
||||
|
||||
List<KanjiVGPath> _parsePaths(XmlElement group) => group
|
||||
.findAllElements('path')
|
||||
.map(
|
||||
(e) => KanjiVGPath(
|
||||
id: int.parse(e.getAttribute('id')!.split('-').last.substring(1)),
|
||||
type: e.getAttribute('kvg:type'),
|
||||
svgPath: e.getAttribute('d')!,
|
||||
),
|
||||
)
|
||||
.toList(growable: false);
|
||||
@@ -0,0 +1,53 @@
|
||||
import 'package:jadb/_data_ingestion/kanjivg/objects.dart';
|
||||
import 'package:jadb/table_names/kanjivg.dart';
|
||||
import 'package:sqflite_common/sqflite.dart';
|
||||
|
||||
Future<void> seedKanjiVGData(Iterable<KanjiVGItem> items, Database db) {
|
||||
return db.transaction((txn) async {
|
||||
await txn.execute('PRAGMA defer_foreign_keys = ON');
|
||||
|
||||
final b = txn.batch();
|
||||
|
||||
for (final item in items) {
|
||||
b.insert(KanjiVGTableNames.entry, item.sqlValue);
|
||||
|
||||
for (final path in item.paths) {
|
||||
b.insert(
|
||||
KanjiVGTableNames.path,
|
||||
path.sqlValue..addAll({'character': item.character}),
|
||||
);
|
||||
}
|
||||
|
||||
for (final strokeNumber in item.strokeNumbers) {
|
||||
b.insert(
|
||||
KanjiVGTableNames.strokeNumber,
|
||||
strokeNumber.sqlValue..addAll({'character': item.character}),
|
||||
);
|
||||
}
|
||||
|
||||
for (final pathGroup in item.pathGroups) {
|
||||
_insertPathGroup(b, null, pathGroup, item.character);
|
||||
}
|
||||
}
|
||||
|
||||
await b.commit(noResult: true);
|
||||
});
|
||||
}
|
||||
|
||||
/// Recursively insert path groups and their children
|
||||
void _insertPathGroup(
|
||||
Batch b,
|
||||
int? parentGroupId,
|
||||
KanjiPathGroupTreeNode node,
|
||||
String character,
|
||||
) {
|
||||
b.insert(
|
||||
KanjiVGTableNames.pathGroup,
|
||||
node.sqlValue
|
||||
..addAll({'character': character, 'parentGroupId': parentGroupId}),
|
||||
);
|
||||
|
||||
for (final child in node.children) {
|
||||
_insertPathGroup(b, node.id, child, character);
|
||||
}
|
||||
}
|
||||
@@ -4,6 +4,8 @@ import 'package:jadb/_data_ingestion/jmdict/seed_data.dart';
|
||||
import 'package:jadb/_data_ingestion/jmdict/xml_parser.dart';
|
||||
import 'package:jadb/_data_ingestion/kanjidic/seed_data.dart';
|
||||
import 'package:jadb/_data_ingestion/kanjidic/xml_parser.dart';
|
||||
import 'package:jadb/_data_ingestion/kanjivg/parser.dart';
|
||||
import 'package:jadb/_data_ingestion/kanjivg/seed_data.dart';
|
||||
import 'package:jadb/_data_ingestion/radkfile/parser.dart';
|
||||
import 'package:jadb/_data_ingestion/radkfile/seed_data.dart';
|
||||
import 'package:jadb/_data_ingestion/tanos-jlpt/csv_parser.dart';
|
||||
@@ -17,6 +19,7 @@ Future<void> seedData(Database db) async {
|
||||
await parseAndSeedDataFromRADKFILE(db);
|
||||
await parseAndSeedDataFromKANJIDIC(db);
|
||||
await parseAndSeedDataFromTanosJLPT(db);
|
||||
await parseAndSeedDataFromKanjiVG(db);
|
||||
|
||||
print('Performing VACUUM');
|
||||
await db.execute('VACUUM');
|
||||
@@ -102,3 +105,17 @@ Future<void> parseAndSeedDataFromTanosJLPT(Database db) async {
|
||||
print('[TANOS-JLPT] Writing to database...');
|
||||
await seedTanosJLPTData(resolvedEntries, db);
|
||||
}
|
||||
|
||||
Future<void> parseAndSeedDataFromKanjiVG(Database db) async {
|
||||
final kanjivgPath =
|
||||
Platform.environment['KANJIVG_PATH'] ?? 'data/kanjivg';
|
||||
if (!Directory(kanjivgPath).existsSync()) {
|
||||
throw Exception('KANJIVG directory not found at $kanjivgPath');
|
||||
}
|
||||
|
||||
print('[KANJIVG] Parsing content...');
|
||||
final items = parseKanjiVGData(Directory(kanjivgPath));
|
||||
|
||||
print('[KANJIVG] Writing to database...');
|
||||
await seedKanjiVGData(items, db);
|
||||
}
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import 'package:jadb/table_names/jmdict.dart';
|
||||
import 'package:jadb/table_names/kanjidic.dart';
|
||||
import 'package:jadb/table_names/kanjivg.dart';
|
||||
import 'package:jadb/table_names/radkfile.dart';
|
||||
import 'package:jadb/table_names/tanos_jlpt.dart';
|
||||
import 'package:sqflite_common/sqlite_api.dart';
|
||||
@@ -21,6 +22,7 @@ Future<void> verifyTablesWithDbConnection(DatabaseExecutor db) async {
|
||||
...KANJIDICTableNames.allTables,
|
||||
...RADKFILETableNames.allTables,
|
||||
...TanosJLPTTableNames.allTables,
|
||||
...KanjiVGTableNames.allTables,
|
||||
};
|
||||
|
||||
final missingTables = expectedTables.difference(tables);
|
||||
|
||||
@@ -0,0 +1,9 @@
|
||||
abstract class KanjiVGTableNames {
|
||||
static const String version = 'KanjiVG_Version';
|
||||
static const String entry = 'KanjiVG_Entry';
|
||||
static const String path = 'KanjiVG_Path';
|
||||
static const String strokeNumber = 'KanjiVG_StrokeNumber';
|
||||
static const String pathGroup = 'KanjiVG_PathGroup';
|
||||
|
||||
static Set<String> get allTables => {version, entry, path, strokeNumber, pathGroup};
|
||||
}
|
||||
@@ -0,0 +1,67 @@
|
||||
CREATE TABLE "KanjiVG_Version" (
|
||||
"version" VARCHAR(10) PRIMARY KEY NOT NULL,
|
||||
"date" DATE NOT NULL,
|
||||
"hash" VARCHAR(64) NOT NULL
|
||||
) WITHOUT ROWID;
|
||||
|
||||
CREATE TRIGGER "KanjiVG_Version_SingleRow"
|
||||
BEFORE INSERT ON "KanjiVG_Version"
|
||||
WHEN (SELECT COUNT(*) FROM "KanjiVG_Version") >= 1
|
||||
BEGIN
|
||||
SELECT RAISE(FAIL, 'Only one row allowed in KanjiVG_Version');
|
||||
END;
|
||||
|
||||
CREATE TABLE "KanjiVG_Entry" (
|
||||
"character" CHAR(1) PRIMARY KEY NOT NULL
|
||||
) WITHOUT ROWID;
|
||||
|
||||
CREATE TABLE "KanjiVG_StrokeNumber" (
|
||||
"character" CHAR(1) NOT NULL REFERENCES "KanjiVG_Entry"("character"),
|
||||
"strokeNum" INTEGER NOT NULL,
|
||||
"x" REAL NOT NULL,
|
||||
"y" REAL NOT NULL,
|
||||
PRIMARY KEY ("character", "strokeNum"),
|
||||
FOREIGN KEY ("character", "strokeNum") REFERENCES "KanjiVG_Path"("character", "pathId")
|
||||
) WITHOUT ROWID;
|
||||
|
||||
CREATE TABLE "KanjiVG_Path" (
|
||||
"character" CHAR(1) NOT NULL REFERENCES "KanjiVG_Entry"("character"),
|
||||
"pathId" INTEGER NOT NULL,
|
||||
"type" VARCHAR(10),
|
||||
"svgPath" TEXT NOT NULL,
|
||||
PRIMARY KEY ("character", "pathId")
|
||||
) WITHOUT ROWID;
|
||||
|
||||
CREATE TABLE "KanjiVG_PathGroup" (
|
||||
"character" CHAR(1) NOT NULL REFERENCES "KanjiVG_Entry"("character"),
|
||||
"groupId" INTEGER NOT NULL,
|
||||
"parentGroupId" TEXT,
|
||||
"element" TEXT,
|
||||
"original" TEXT,
|
||||
"position" VARCHAR(6),
|
||||
"radical" TEXT,
|
||||
"part" INTEGER,
|
||||
PRIMARY KEY ("character", "groupId"),
|
||||
CHECK (
|
||||
"position" IN (
|
||||
'upperA',
|
||||
'upperB',
|
||||
'lower1',
|
||||
'lower2',
|
||||
'bottom',
|
||||
'kamae',
|
||||
'kamaec',
|
||||
'left',
|
||||
'middle',
|
||||
'nyo',
|
||||
'nyoc',
|
||||
'right',
|
||||
'tare',
|
||||
'tarec',
|
||||
'top'
|
||||
)
|
||||
OR
|
||||
"position" IS NULL
|
||||
),
|
||||
FOREIGN KEY ("character", "parentGroupId") REFERENCES "KanjiVG_PathGroup"("character", "groupId")
|
||||
) WITHOUT ROWID;
|
||||
@@ -7,6 +7,7 @@
|
||||
radkfile,
|
||||
kanjidic2,
|
||||
tanos-jlpt,
|
||||
kanjivg,
|
||||
sqlite,
|
||||
wal ? false,
|
||||
}:
|
||||
@@ -39,6 +40,8 @@ stdenvNoCC.mkDerivation {
|
||||
TANOS_JLPT_VERSION = tanos-jlpt.version;
|
||||
TANOS_JLPT_DATE = tanos-jlpt.date;
|
||||
TANOS_JLPT_HASH = tanos-jlpt.hash;
|
||||
|
||||
KANJIVG_PATH = "${kanjivg}/kanji";
|
||||
};
|
||||
|
||||
buildPhase = ''
|
||||
|
||||
Reference in New Issue
Block a user