WIP: add kanjivg data
Build and test / build (push) Successful in 5m28s

This commit is contained in:
2026-03-03 13:47:59 +09:00
parent f0e919c397
commit 80d19543fb
10 changed files with 444 additions and 0 deletions
+168
View File
@@ -0,0 +1,168 @@
import 'package:jadb/_data_ingestion/sql_writable.dart';
/// Enum set in the kvg:position attribute, used by `<g>` elements in the KanjiVG SVG files.
enum KanjiPathGroupPosition {
upperA,
upperB,
lower1,
lower2,
bottom,
kamae,
kamaec,
left,
middle,
nyo,
nyoc,
right,
tare,
tarec,
top;
static KanjiPathGroupPosition? fromString(String? str) {
if (str == null) return null;
switch (str) {
case '⿵A':
return KanjiPathGroupPosition.upperA;
case '⿵B':
return KanjiPathGroupPosition.upperB;
case '⿶1':
return KanjiPathGroupPosition.lower1;
case '⿶2':
return KanjiPathGroupPosition.lower2;
case 'bottom':
return KanjiPathGroupPosition.bottom;
case 'kamae':
return KanjiPathGroupPosition.kamae;
case 'kamaec':
return KanjiPathGroupPosition.kamaec;
case 'left':
return KanjiPathGroupPosition.left;
case 'middle':
return KanjiPathGroupPosition.middle;
case 'nyo':
return KanjiPathGroupPosition.nyo;
case 'nyoc':
return KanjiPathGroupPosition.nyoc;
case 'right':
return KanjiPathGroupPosition.right;
case 'tare':
return KanjiPathGroupPosition.tare;
case 'tarec':
return KanjiPathGroupPosition.tarec;
case 'top':
return KanjiPathGroupPosition.top;
default:
throw ArgumentError('Unknown position: $str');
}
}
}
enum KanjiVGRadical {
general,
jis,
nelson,
tradit;
static KanjiVGRadical? fromString(String? str) {
if (str == null) return null;
switch (str) {
case 'general':
return KanjiVGRadical.general;
case 'jis':
return KanjiVGRadical.jis;
case 'nelson':
return KanjiVGRadical.nelson;
case 'tradit':
return KanjiVGRadical.tradit;
default:
throw ArgumentError('Unknown radical: $str');
}
}
}
/// Contents of a \<g> element in the KanjiVG SVG files.
class KanjiPathGroupTreeNode extends SQLWritable {
final int id;
final List<KanjiPathGroupTreeNode> children;
final String? element;
final String? original;
final KanjiPathGroupPosition? position;
final KanjiVGRadical? radical;
final int? part;
// Currently unused data.
final bool radicalForm;
final bool tradForm;
final bool partial;
final String? variant;
KanjiPathGroupTreeNode({
required this.id,
this.children = const [],
this.element,
this.original,
this.position,
this.radical,
this.part,
this.variant,
this.radicalForm = false,
this.tradForm = false,
this.partial = false,
});
@override
Map<String, Object?> get sqlValue => {
'groupId': id,
'element': element,
'original': original,
'position': position?.name,
'radical': radical?.name,
'part': part,
};
}
/// Contents of a `<text>` element in the StrokeNumber's group in the KanjiVG SVG files
class KanjiStrokeNumber extends SQLWritable {
final int num;
final double x;
final double y;
KanjiStrokeNumber(this.num, this.x, this.y);
@override
Map<String, Object?> get sqlValue => {'strokeNum': num, 'x': x, 'y': y};
}
/// Contents of a `<path>` element in the KanjiVG SVG files
class KanjiVGPath extends SQLWritable {
final int id;
final String? type;
final String svgPath;
KanjiVGPath({required this.id, required this.type, required this.svgPath});
@override
Map<String, Object?> get sqlValue => {
'pathId': id,
'type': type,
'svgPath': svgPath,
};
}
class KanjiVGItem extends SQLWritable {
final String character;
final List<KanjiVGPath> paths;
final List<KanjiStrokeNumber> strokeNumbers;
final List<KanjiPathGroupTreeNode> pathGroups;
KanjiVGItem({
required this.character,
required this.paths,
required this.strokeNumbers,
required this.pathGroups,
});
@override
Map<String, Object?> get sqlValue => {'character': character};
}
+100
View File
@@ -0,0 +1,100 @@
import 'dart:io';
import 'package:collection/collection.dart';
import 'package:jadb/_data_ingestion/kanjivg/objects.dart';
import 'package:xml/xml.dart';
List<KanjiVGItem> parseKanjiVGData(Directory rootDir) {
final List<KanjiVGItem> items = [];
for (final file in rootDir.listSync()) {
if (file is File && file.path.endsWith('.svg')) {
final String rawSVG = file.readAsStringSync();
final XmlDocument doc = XmlDocument.parse(rawSVG);
final strokePathsGroup = doc
.findAllElements('g')
.firstWhereOrNull(
(e) => e.getAttribute('id')?.startsWith('kvg:StrokePaths') ?? false,
);
final strokeNumbersGroup = doc
.findAllElements('g')
.firstWhereOrNull(
(e) =>
e.getAttribute('id')?.startsWith('kvg:StrokeNumbers') ?? false,
);
final pathGroups = strokePathsGroup != null
? _parsePathGroups(strokePathsGroup)
: <KanjiPathGroupTreeNode>[];
final strokeNumbers = strokeNumbersGroup != null
? _parseStrokeNumbers(strokeNumbersGroup)
: <KanjiStrokeNumber>[];
final paths = strokePathsGroup != null
? _parsePaths(strokePathsGroup)
: <KanjiVGPath>[];
items.add(
KanjiVGItem(
character: file.uri.pathSegments.last.split('.').first,
paths: paths,
strokeNumbers: strokeNumbers,
pathGroups: pathGroups,
),
);
}
}
return items;
}
List<KanjiStrokeNumber> _parseStrokeNumbers(XmlElement group) => group
.childElements
.map((e) {
final num = int.parse(e.innerText);
final xy = e
.getAttribute('transform')!
.split('matrix(1 0 0 1 ')[1]
.split(')')[0]
.split(' ')
.map(double.parse)
.toList();
return KanjiStrokeNumber(num, xy[0], xy[1]);
})
.toList(growable: false);
List<KanjiPathGroupTreeNode> _parsePathGroups(XmlElement group) => group
.findElements('g')
.map((e) {
return KanjiPathGroupTreeNode(
// NOTE: the outermost group does not have a number
id: int.tryParse(e.getAttribute('id')!.split('-').last.substring(1)) ?? 0,
element: e.getAttribute('kvg:element'),
original: e.getAttribute('kvg:original'),
variant: e.getAttribute('kvg:variant'),
position: KanjiPathGroupPosition.fromString(
e.getAttribute('kvg:position'),
),
radical: KanjiVGRadical.fromString(e.getAttribute('kvg:radical')),
part: int.tryParse(e.getAttribute('kvg:part') ?? ''),
radicalForm: e.getAttribute('kvg:radicalForm') == 'true',
tradForm: e.getAttribute('kvg:tradForm') == 'true',
partial: e.getAttribute('kvg:partial') == 'true',
children: _parsePathGroups(e),
);
})
.toList(growable: false);
List<KanjiVGPath> _parsePaths(XmlElement group) => group
.findAllElements('path')
.map(
(e) => KanjiVGPath(
id: int.parse(e.getAttribute('id')!.split('-').last.substring(1)),
type: e.getAttribute('kvg:type'),
svgPath: e.getAttribute('d')!,
),
)
.toList(growable: false);
@@ -0,0 +1,53 @@
import 'package:jadb/_data_ingestion/kanjivg/objects.dart';
import 'package:jadb/table_names/kanjivg.dart';
import 'package:sqflite_common/sqflite.dart';
Future<void> seedKanjiVGData(Iterable<KanjiVGItem> items, Database db) {
return db.transaction((txn) async {
await txn.execute('PRAGMA defer_foreign_keys = ON');
final b = txn.batch();
for (final item in items) {
b.insert(KanjiVGTableNames.entry, item.sqlValue);
for (final path in item.paths) {
b.insert(
KanjiVGTableNames.path,
path.sqlValue..addAll({'character': item.character}),
);
}
for (final strokeNumber in item.strokeNumbers) {
b.insert(
KanjiVGTableNames.strokeNumber,
strokeNumber.sqlValue..addAll({'character': item.character}),
);
}
for (final pathGroup in item.pathGroups) {
_insertPathGroup(b, null, pathGroup, item.character);
}
}
await b.commit(noResult: true);
});
}
/// Recursively insert path groups and their children
void _insertPathGroup(
Batch b,
int? parentGroupId,
KanjiPathGroupTreeNode node,
String character,
) {
b.insert(
KanjiVGTableNames.pathGroup,
node.sqlValue
..addAll({'character': character, 'parentGroupId': parentGroupId}),
);
for (final child in node.children) {
_insertPathGroup(b, node.id, child, character);
}
}
+17
View File
@@ -4,6 +4,8 @@ import 'package:jadb/_data_ingestion/jmdict/seed_data.dart';
import 'package:jadb/_data_ingestion/jmdict/xml_parser.dart';
import 'package:jadb/_data_ingestion/kanjidic/seed_data.dart';
import 'package:jadb/_data_ingestion/kanjidic/xml_parser.dart';
import 'package:jadb/_data_ingestion/kanjivg/parser.dart';
import 'package:jadb/_data_ingestion/kanjivg/seed_data.dart';
import 'package:jadb/_data_ingestion/radkfile/parser.dart';
import 'package:jadb/_data_ingestion/radkfile/seed_data.dart';
import 'package:jadb/_data_ingestion/tanos-jlpt/csv_parser.dart';
@@ -17,6 +19,7 @@ Future<void> seedData(Database db) async {
await parseAndSeedDataFromRADKFILE(db);
await parseAndSeedDataFromKANJIDIC(db);
await parseAndSeedDataFromTanosJLPT(db);
await parseAndSeedDataFromKanjiVG(db);
print('Performing VACUUM');
await db.execute('VACUUM');
@@ -102,3 +105,17 @@ Future<void> parseAndSeedDataFromTanosJLPT(Database db) async {
print('[TANOS-JLPT] Writing to database...');
await seedTanosJLPTData(resolvedEntries, db);
}
Future<void> parseAndSeedDataFromKanjiVG(Database db) async {
final kanjivgPath =
Platform.environment['KANJIVG_PATH'] ?? 'data/kanjivg';
if (!Directory(kanjivgPath).existsSync()) {
throw Exception('KANJIVG directory not found at $kanjivgPath');
}
print('[KANJIVG] Parsing content...');
final items = parseKanjiVGData(Directory(kanjivgPath));
print('[KANJIVG] Writing to database...');
await seedKanjiVGData(items, db);
}
+2
View File
@@ -1,5 +1,6 @@
import 'package:jadb/table_names/jmdict.dart';
import 'package:jadb/table_names/kanjidic.dart';
import 'package:jadb/table_names/kanjivg.dart';
import 'package:jadb/table_names/radkfile.dart';
import 'package:jadb/table_names/tanos_jlpt.dart';
import 'package:sqflite_common/sqlite_api.dart';
@@ -21,6 +22,7 @@ Future<void> verifyTablesWithDbConnection(DatabaseExecutor db) async {
...KANJIDICTableNames.allTables,
...RADKFILETableNames.allTables,
...TanosJLPTTableNames.allTables,
...KanjiVGTableNames.allTables,
};
final missingTables = expectedTables.difference(tables);
+9
View File
@@ -0,0 +1,9 @@
abstract class KanjiVGTableNames {
static const String version = 'KanjiVG_Version';
static const String entry = 'KanjiVG_Entry';
static const String path = 'KanjiVG_Path';
static const String strokeNumber = 'KanjiVG_StrokeNumber';
static const String pathGroup = 'KanjiVG_PathGroup';
static Set<String> get allTables => {version, entry, path, strokeNumber, pathGroup};
}