Files
jadb/lib/_data_ingestion/kanjivg/parser.dart
T
oysteikt bbdb177fa4
Build and test / build (push) Successful in 10m0s
Add kanjivg data
2026-06-10 09:41:39 +09:00

113 lines
3.5 KiB
Dart

import 'dart:io';
import 'package:collection/collection.dart';
import 'package:jadb/_data_ingestion/kanjivg/objects.dart';
import 'package:xml/xml.dart';
List<KanjiVGItem> parseKanjiVGData(Directory rootDir) {
final List<KanjiVGItem> items = [];
for (final file in rootDir.listSync()) {
if (file is File && file.path.endsWith('.svg')) {
final String rawSVG = file.readAsStringSync();
final XmlDocument doc = XmlDocument.parse(rawSVG);
final strokePathsGroup = doc
.findAllElements('g')
.firstWhereOrNull(
(e) => e.getAttribute('id')?.startsWith('kvg:StrokePaths') ?? false,
);
final strokeNumbersGroup = doc
.findAllElements('g')
.firstWhereOrNull(
(e) =>
e.getAttribute('id')?.startsWith('kvg:StrokeNumbers') ?? false,
);
final pathGroups = strokePathsGroup != null
? _parsePathGroups(strokePathsGroup)
: <KanjiPathGroupTreeNode>[];
final strokeNumbers = strokeNumbersGroup != null
? _parseStrokeNumbers(strokeNumbersGroup)
: <KanjiStrokeNumber>[];
final paths = strokePathsGroup != null
? _parsePaths(strokePathsGroup)
: <KanjiVGPath>[];
items.add(
KanjiVGItem(
character: file.uri.pathSegments.last.split('.').first,
paths: paths,
strokeNumbers: strokeNumbers,
pathGroups: pathGroups,
),
);
}
}
return items;
}
List<KanjiStrokeNumber> _parseStrokeNumbers(XmlElement group) => group
.childElements
.map((e) {
final num = int.parse(e.innerText);
final xy = e
.getAttribute('transform')!
.split('matrix(1 0 0 1 ')[1]
.split(')')[0]
.split(' ')
.map(double.parse)
.toList();
return KanjiStrokeNumber(num, xy[0], xy[1]);
})
.toList(growable: false);
List<KanjiPathGroupTreeNode> _parsePathGroups(XmlElement group) => group
.findElements('g')
.map((e) {
return KanjiPathGroupTreeNode(
// NOTE: the outermost group does not have a number
id:
int.tryParse(e.getAttribute('id')!.split('-').last.substring(1)) ??
0,
element: e.getAttribute('kvg:element'),
original: e.getAttribute('kvg:original'),
variant: e.getAttribute('kvg:variant'),
position: KanjiPathGroupPosition.fromString(
e.getAttribute('kvg:position'),
),
radical: KanjiVGRadical.fromString(e.getAttribute('kvg:radical')),
part: int.tryParse(e.getAttribute('kvg:part') ?? ''),
radicalForm: e.getAttribute('kvg:radicalForm') == 'true',
tradForm: e.getAttribute('kvg:tradForm') == 'true',
partial: e.getAttribute('kvg:partial') == 'true',
children: _parsePathGroups(e),
);
})
.toList(growable: false);
List<KanjiVGPath> _parsePaths(XmlElement group) => group
.findAllElements('g')
.map(
(g) => g
.findElements('path')
.map(
(e) => KanjiVGPath(
id: int.parse(e.getAttribute('id')!.split('-').last.substring(1)),
groupId:
int.tryParse(
g.getAttribute('id')!.split('-').last.substring(1),
) ??
0,
type: e.getAttribute('kvg:type'),
svgPath: e.getAttribute('d')!,
),
),
)
.expand((x) => x)
.toList(growable: false);