From bbdb177fa4e936bffcf2be06962347cb3c61f8ef Mon Sep 17 00:00:00 2001 From: h7x4 Date: Tue, 3 Mar 2026 13:47:59 +0900 Subject: [PATCH] Add kanjivg data --- flake.lock | 17 ++ flake.nix | 8 + lib/_data_ingestion/kanjivg/objects.dart | 175 +++++++++++++++ lib/_data_ingestion/kanjivg/parser.dart | 112 ++++++++++ lib/_data_ingestion/kanjivg/seed_data.dart | 53 +++++ lib/_data_ingestion/seed_database.dart | 16 ++ lib/models/kanjivg/kanjivg_entry.dart | 53 +++++ lib/models/kanjivg/kanjivg_path.dart | 55 +++++ lib/models/kanjivg/kanjivg_path_group.dart | 100 +++++++++ .../kanjivg/kanjivg_path_group_position.dart | 41 ++++ lib/models/kanjivg/kanjivg_radical.dart | 20 ++ lib/models/verify_tables.dart | 2 + lib/search.dart | 12 ++ lib/search/kanji_vg_search.dart | 175 +++++++++++++++ lib/table_names/kanjivg.dart | 15 ++ lib/version.dart | 2 +- migrations/0011_KanjiVG.sql | 69 ++++++ nix/database.nix | 3 + test/models/kanjivg_test.dart | 201 ++++++++++++++++++ test/search/kanji_vg_search_test.dart | 67 ++++++ 20 files changed, 1195 insertions(+), 1 deletion(-) create mode 100644 lib/_data_ingestion/kanjivg/objects.dart create mode 100644 lib/_data_ingestion/kanjivg/parser.dart create mode 100644 lib/_data_ingestion/kanjivg/seed_data.dart create mode 100644 lib/models/kanjivg/kanjivg_entry.dart create mode 100644 lib/models/kanjivg/kanjivg_path.dart create mode 100644 lib/models/kanjivg/kanjivg_path_group.dart create mode 100644 lib/models/kanjivg/kanjivg_path_group_position.dart create mode 100644 lib/models/kanjivg/kanjivg_radical.dart create mode 100644 lib/search/kanji_vg_search.dart create mode 100644 lib/table_names/kanjivg.dart create mode 100644 migrations/0011_KanjiVG.sql create mode 100644 test/models/kanjivg_test.dart create mode 100644 test/search/kanji_vg_search_test.dart diff --git a/flake.lock b/flake.lock index 23f9256..4095788 100644 --- a/flake.lock +++ b/flake.lock @@ -35,6 +35,22 @@ "url": "https://git.pvv.ntnu.no/Mugiten/datasources.git" } }, + "kanjivg-src": { + "flake": false, + "locked": { + "lastModified": 1778620714, + "narHash": "sha256-LwNcY5A6XPGI+DASZfmP7OeYe8IFesShhSrE7Go2ux8=", + "ref": "refs/heads/master", + "rev": "1957802840a6f059d1e27dcb5755722955cc7dbb", + "revCount": 2217, + "type": "git", + "url": "https://git.pvv.ntnu.no/mugiten/kanjivg.git" + }, + "original": { + "type": "git", + "url": "https://git.pvv.ntnu.no/mugiten/kanjivg.git" + } + }, "nix-sqlite": { "inputs": { "nixpkgs": [ @@ -75,6 +91,7 @@ "root": { "inputs": { "datasources": "datasources", + "kanjivg-src": "kanjivg-src", "nixpkgs": "nixpkgs", "tamerye": "tamerye" } diff --git a/flake.nix b/flake.nix index 09c2019..6dc69d9 100644 --- a/flake.nix +++ b/flake.nix @@ -13,6 +13,11 @@ url = "git+https://git.pvv.ntnu.no/Mugiten/datasources.git"; inputs.nixpkgs.follows = "nixpkgs"; }; + + kanjivg-src = { + url = "git+https://git.pvv.ntnu.no/mugiten/kanjivg.git"; + flake = false; + }; }; outputs = { @@ -20,6 +25,7 @@ nixpkgs, tamerye, datasources, + kanjivg-src, }: let inherit (nixpkgs) lib; systems = [ @@ -139,6 +145,7 @@ database = pkgs.callPackage ./nix/database.nix { sqlite = pkgs.tamerye-sqlite-cli; inherit (datasources.packages.${system}) jmdict radkfile kanjidic2 tanos-jlpt; + kanjivg = kanjivg-src; inherit (self.packages.${system}) database-tool; inherit src; }; @@ -146,6 +153,7 @@ database-wal = pkgs.callPackage ./nix/database.nix { sqlite = pkgs.tamerye-sqlite-cli; inherit (datasources.packages.${system}) jmdict radkfile kanjidic2 tanos-jlpt; + kanjivg = kanjivg-src; inherit (self.packages.${system}) database-tool; inherit src; wal = true; diff --git a/lib/_data_ingestion/kanjivg/objects.dart b/lib/_data_ingestion/kanjivg/objects.dart new file mode 100644 index 0000000..6741a24 --- /dev/null +++ b/lib/_data_ingestion/kanjivg/objects.dart @@ -0,0 +1,175 @@ +import 'package:jadb/_data_ingestion/sql_writable.dart'; + +/// Enum set in the kvg:position attribute, used by `` elements in the KanjiVG SVG files. +enum KanjiPathGroupPosition { + upperA, + upperB, + lower1, + lower2, + bottom, + kamae, + kamaec, + left, + middle, + nyo, + nyoc, + right, + tare, + tarec, + top; + + static KanjiPathGroupPosition? fromString(String? str) { + if (str == null) return null; + switch (str) { + case '⿵A': + return KanjiPathGroupPosition.upperA; + case '⿵B': + return KanjiPathGroupPosition.upperB; + case '⿶1': + return KanjiPathGroupPosition.lower1; + case '⿶2': + return KanjiPathGroupPosition.lower2; + case 'bottom': + return KanjiPathGroupPosition.bottom; + case 'kamae': + return KanjiPathGroupPosition.kamae; + case 'kamaec': + return KanjiPathGroupPosition.kamaec; + case 'left': + return KanjiPathGroupPosition.left; + case 'middle': + return KanjiPathGroupPosition.middle; + case 'nyo': + return KanjiPathGroupPosition.nyo; + case 'nyoc': + return KanjiPathGroupPosition.nyoc; + case 'right': + return KanjiPathGroupPosition.right; + case 'tare': + return KanjiPathGroupPosition.tare; + case 'tarec': + return KanjiPathGroupPosition.tarec; + case 'top': + return KanjiPathGroupPosition.top; + default: + throw ArgumentError('Unknown position: $str'); + } + } +} + +enum KanjiVGRadical { + general, + jis, + nelson, + tradit; + + static KanjiVGRadical? fromString(String? str) { + if (str == null) return null; + switch (str) { + case 'general': + return KanjiVGRadical.general; + case 'jis': + return KanjiVGRadical.jis; + case 'nelson': + return KanjiVGRadical.nelson; + case 'tradit': + return KanjiVGRadical.tradit; + default: + throw ArgumentError('Unknown radical: $str'); + } + } +} + +/// Contents of a \ element in the KanjiVG SVG files. +class KanjiPathGroupTreeNode extends SQLWritable { + final int id; + final List children; + final String? element; + final String? original; + final KanjiPathGroupPosition? position; + final KanjiVGRadical? radical; + final int? part; + + // Currently unused data. + final bool radicalForm; + final bool tradForm; + final bool partial; + final String? variant; + + KanjiPathGroupTreeNode({ + required this.id, + this.children = const [], + this.element, + this.original, + this.position, + this.radical, + this.part, + + this.variant, + this.radicalForm = false, + this.tradForm = false, + this.partial = false, + }); + + @override + Map get sqlValue => { + 'groupId': id, + 'element': element, + 'original': original, + 'position': position?.name, + 'radical': radical?.name, + 'part': part, + }; +} + +/// Contents of a `` element in the StrokeNumber's group in the KanjiVG SVG files +class KanjiStrokeNumber extends SQLWritable { + final int num; + final double x; + final double y; + + KanjiStrokeNumber(this.num, this.x, this.y); + + @override + Map get sqlValue => {'strokeNum': num, 'x': x, 'y': y}; +} + +/// Contents of a `` element in the KanjiVG SVG files +class KanjiVGPath extends SQLWritable { + final int id; + final int groupId; + final String? type; + final String svgPath; + + KanjiVGPath({ + required this.id, + required this.groupId, + required this.type, + required this.svgPath, + }); + + @override + Map get sqlValue => { + 'pathId': id, + 'groupId': groupId, + 'type': type, + 'svgPath': svgPath, + }; +} + +class KanjiVGItem extends SQLWritable { + final String character; + final List paths; + final List strokeNumbers; + final List pathGroups; + + KanjiVGItem({ + required this.character, + required this.paths, + required this.strokeNumbers, + required this.pathGroups, + }); + + @override + Map get sqlValue => {'character': character}; +} diff --git a/lib/_data_ingestion/kanjivg/parser.dart b/lib/_data_ingestion/kanjivg/parser.dart new file mode 100644 index 0000000..6a70e1d --- /dev/null +++ b/lib/_data_ingestion/kanjivg/parser.dart @@ -0,0 +1,112 @@ +import 'dart:io'; + +import 'package:collection/collection.dart'; +import 'package:jadb/_data_ingestion/kanjivg/objects.dart'; +import 'package:xml/xml.dart'; + +List parseKanjiVGData(Directory rootDir) { + final List items = []; + + for (final file in rootDir.listSync()) { + if (file is File && file.path.endsWith('.svg')) { + final String rawSVG = file.readAsStringSync(); + final XmlDocument doc = XmlDocument.parse(rawSVG); + + final strokePathsGroup = doc + .findAllElements('g') + .firstWhereOrNull( + (e) => e.getAttribute('id')?.startsWith('kvg:StrokePaths') ?? false, + ); + + final strokeNumbersGroup = doc + .findAllElements('g') + .firstWhereOrNull( + (e) => + e.getAttribute('id')?.startsWith('kvg:StrokeNumbers') ?? false, + ); + + final pathGroups = strokePathsGroup != null + ? _parsePathGroups(strokePathsGroup) + : []; + + final strokeNumbers = strokeNumbersGroup != null + ? _parseStrokeNumbers(strokeNumbersGroup) + : []; + + final paths = strokePathsGroup != null + ? _parsePaths(strokePathsGroup) + : []; + + items.add( + KanjiVGItem( + character: file.uri.pathSegments.last.split('.').first, + paths: paths, + strokeNumbers: strokeNumbers, + pathGroups: pathGroups, + ), + ); + } + } + + return items; +} + +List _parseStrokeNumbers(XmlElement group) => group + .childElements + .map((e) { + final num = int.parse(e.innerText); + final xy = e + .getAttribute('transform')! + .split('matrix(1 0 0 1 ')[1] + .split(')')[0] + .split(' ') + .map(double.parse) + .toList(); + return KanjiStrokeNumber(num, xy[0], xy[1]); + }) + .toList(growable: false); + +List _parsePathGroups(XmlElement group) => group + .findElements('g') + .map((e) { + return KanjiPathGroupTreeNode( + // NOTE: the outermost group does not have a number + id: + int.tryParse(e.getAttribute('id')!.split('-').last.substring(1)) ?? + 0, + element: e.getAttribute('kvg:element'), + original: e.getAttribute('kvg:original'), + variant: e.getAttribute('kvg:variant'), + position: KanjiPathGroupPosition.fromString( + e.getAttribute('kvg:position'), + ), + radical: KanjiVGRadical.fromString(e.getAttribute('kvg:radical')), + part: int.tryParse(e.getAttribute('kvg:part') ?? ''), + radicalForm: e.getAttribute('kvg:radicalForm') == 'true', + tradForm: e.getAttribute('kvg:tradForm') == 'true', + partial: e.getAttribute('kvg:partial') == 'true', + children: _parsePathGroups(e), + ); + }) + .toList(growable: false); + +List _parsePaths(XmlElement group) => group + .findAllElements('g') + .map( + (g) => g + .findElements('path') + .map( + (e) => KanjiVGPath( + id: int.parse(e.getAttribute('id')!.split('-').last.substring(1)), + groupId: + int.tryParse( + g.getAttribute('id')!.split('-').last.substring(1), + ) ?? + 0, + type: e.getAttribute('kvg:type'), + svgPath: e.getAttribute('d')!, + ), + ), + ) + .expand((x) => x) + .toList(growable: false); diff --git a/lib/_data_ingestion/kanjivg/seed_data.dart b/lib/_data_ingestion/kanjivg/seed_data.dart new file mode 100644 index 0000000..5d2fe29 --- /dev/null +++ b/lib/_data_ingestion/kanjivg/seed_data.dart @@ -0,0 +1,53 @@ +import 'package:jadb/_data_ingestion/kanjivg/objects.dart'; +import 'package:jadb/table_names/kanjivg.dart'; +import 'package:sqflite_common/sqflite.dart'; + +Future seedKanjiVGData(Iterable items, Database db) { + return db.transaction((txn) async { + await txn.execute('PRAGMA defer_foreign_keys = ON'); + + final b = txn.batch(); + + for (final item in items) { + b.insert(KanjiVGTableNames.entry, item.sqlValue); + + for (final path in item.paths) { + b.insert( + KanjiVGTableNames.path, + path.sqlValue..addAll({'character': item.character}), + ); + } + + for (final strokeNumber in item.strokeNumbers) { + b.insert( + KanjiVGTableNames.strokeNumber, + strokeNumber.sqlValue..addAll({'character': item.character}), + ); + } + + for (final pathGroup in item.pathGroups) { + _insertPathGroup(b, null, pathGroup, item.character); + } + } + + await b.commit(noResult: true); + }); +} + +/// Recursively insert path groups and their children +void _insertPathGroup( + Batch b, + int? parentGroupId, + KanjiPathGroupTreeNode node, + String character, +) { + b.insert( + KanjiVGTableNames.pathGroup, + node.sqlValue + ..addAll({'character': character, 'parentGroupId': parentGroupId}), + ); + + for (final child in node.children) { + _insertPathGroup(b, node.id, child, character); + } +} diff --git a/lib/_data_ingestion/seed_database.dart b/lib/_data_ingestion/seed_database.dart index 610a292..70e4a97 100644 --- a/lib/_data_ingestion/seed_database.dart +++ b/lib/_data_ingestion/seed_database.dart @@ -4,6 +4,8 @@ import 'package:jadb/_data_ingestion/jmdict/seed_data.dart'; import 'package:jadb/_data_ingestion/jmdict/xml_parser.dart'; import 'package:jadb/_data_ingestion/kanjidic/seed_data.dart'; import 'package:jadb/_data_ingestion/kanjidic/xml_parser.dart'; +import 'package:jadb/_data_ingestion/kanjivg/parser.dart'; +import 'package:jadb/_data_ingestion/kanjivg/seed_data.dart'; import 'package:jadb/_data_ingestion/radkfile/parser.dart'; import 'package:jadb/_data_ingestion/radkfile/seed_data.dart'; import 'package:jadb/_data_ingestion/tanos-jlpt/csv_parser.dart'; @@ -17,6 +19,7 @@ Future seedData(Database db) async { await parseAndSeedDataFromRADKFILE(db); await parseAndSeedDataFromKANJIDIC(db); await parseAndSeedDataFromTanosJLPT(db); + await parseAndSeedDataFromKanjiVG(db); print('Performing VACUUM'); await db.execute('VACUUM'); @@ -102,3 +105,16 @@ Future parseAndSeedDataFromTanosJLPT(Database db) async { print('[TANOS-JLPT] Writing to database...'); await seedTanosJLPTData(resolvedEntries, db); } + +Future parseAndSeedDataFromKanjiVG(Database db) async { + final kanjivgPath = Platform.environment['KANJIVG_PATH'] ?? 'data/kanjivg'; + if (!Directory(kanjivgPath).existsSync()) { + throw Exception('KANJIVG directory not found at $kanjivgPath'); + } + + print('[KANJIVG] Parsing content...'); + final items = parseKanjiVGData(Directory(kanjivgPath)); + + print('[KANJIVG] Writing to database...'); + await seedKanjiVGData(items, db); +} diff --git a/lib/models/kanjivg/kanjivg_entry.dart b/lib/models/kanjivg/kanjivg_entry.dart new file mode 100644 index 0000000..67ef441 --- /dev/null +++ b/lib/models/kanjivg/kanjivg_entry.dart @@ -0,0 +1,53 @@ +import 'package:equatable/equatable.dart'; + +import 'kanjivg_path.dart'; +import 'kanjivg_path_group.dart'; + +/// A full KanjiVG entry for a single character. +class KanjiVGEntry extends Equatable { + /// The kanji or character this entry belongs to. + final String character; + + /// All stroke paths in drawing order. + /// + /// Each path includes the rendered position of its stroke label. + final List paths; + + /// The hierarchical group structure of the entry. + /// + /// These are not really used in mugiten at the moment, so querying them is optional. + final List? pathGroups; + + KanjiVGEntry({ + required this.character, + this.paths = const [], + this.pathGroups = const [], + }) : assert( + paths.isEmpty || + (paths.first.pathId == 1 && + paths.last.pathId == paths.length && + paths.every((p) => p.pathId > 0)), + 'Paths must be listed in a strictly growing order without holes, starting from pathId 1.', + ); + + @override + List get props => [character, paths, pathGroups]; + + Map toJson() => { + 'character': character, + 'paths': paths.map((e) => e.toJson()).toList(), + 'pathGroups': pathGroups?.map((e) => e.toJson()).toList(), + }; + + factory KanjiVGEntry.fromJson(Map json) => KanjiVGEntry( + character: json['character'] as String, + paths: ((json['paths'] as List?) ?? const []) + .map((e) => KanjiVGPath.fromJson(Map.from(e as Map))) + .toList(), + pathGroups: ((json['pathGroups'] as List?)) + ?.map( + (e) => KanjiVGPathGroup.fromJson(Map.from(e as Map)), + ) + .toList(), + ); +} diff --git a/lib/models/kanjivg/kanjivg_path.dart b/lib/models/kanjivg/kanjivg_path.dart new file mode 100644 index 0000000..c286026 --- /dev/null +++ b/lib/models/kanjivg/kanjivg_path.dart @@ -0,0 +1,55 @@ +import 'package:equatable/equatable.dart'; + +/// A stroke path from a KanjiVG entry. +class KanjiVGPath extends Equatable { + /// The path id within the KanjiVG entry. + final int pathId; + + /// The optional KanjiVG stroke type. + final String? type; + + /// The raw SVG `d` path string. + final String svgPath; + + /// The x-coordinate of the rendered stroke-label position. + final double labelX; + + /// The y-coordinate of the rendered stroke-label position. + final double labelY; + + KanjiVGPath({ + required this.pathId, + required this.type, + required this.svgPath, + required this.labelX, + required this.labelY, + }) : assert(pathId > 0, 'pathId must be a positive integer. Found $pathId.'), + assert(svgPath.isNotEmpty, 'svgPath cannot be empty.'), + assert( + labelX.isFinite, + 'labelX must be a finite number. Found $labelX.', + ), + assert( + labelY.isFinite, + 'labelY must be a finite number. Found $labelY.', + ); + + @override + List get props => [pathId, type, svgPath, labelX, labelY]; + + Map toJson() => { + 'pathId': pathId, + 'type': type, + 'svgPath': svgPath, + 'labelX': labelX, + 'labelY': labelY, + }; + + factory KanjiVGPath.fromJson(Map json) => KanjiVGPath( + pathId: json['pathId'] as int, + type: json['type'] as String?, + svgPath: json['svgPath'] as String, + labelX: (json['labelX'] as num).toDouble(), + labelY: (json['labelY'] as num).toDouble(), + ); +} diff --git a/lib/models/kanjivg/kanjivg_path_group.dart b/lib/models/kanjivg/kanjivg_path_group.dart new file mode 100644 index 0000000..a31c142 --- /dev/null +++ b/lib/models/kanjivg/kanjivg_path_group.dart @@ -0,0 +1,100 @@ +import 'package:equatable/equatable.dart'; +import 'package:jadb/models/kanjivg/kanjivg_path.dart'; + +import 'kanjivg_path_group_position.dart'; +import 'kanjivg_radical.dart'; + +/// A hierarchical path-group from a KanjiVG entry. +class KanjiVGPathGroup extends Equatable { + /// The path-group id within the entry. + final int groupId; + + /// The paths directly contained in this group, in drawing order. + final List paths; + + /// Nested child groups. + final List children; + + /// The value of the `kvg:element` attribute, if present. + final String? element; + + /// The original element before simplification, if present. + final String? original; + + /// Relative position of the group inside the character layout. + final KanjiVGPathGroupPosition? position; + + /// Radical classification for the group. + final KanjiVGRadical? radical; + + /// Part number for repeated elements, if present. + final int? part; + + KanjiVGPathGroup({ + required this.groupId, + this.paths = const [], + this.children = const [], + this.element, + this.original, + this.position, + this.radical, + this.part, + }) : assert( + groupId >= 0, + 'groupId must be a non-negative integer. Found $groupId.', + ), + assert( + paths.isEmpty || + paths.fold( + 0, + (previousMax, path) => path.pathId > previousMax + ? path.pathId + : throw ArgumentError( + 'Paths must be listed in a strictly growing order without holes. Found pathId ${path.pathId} after $previousMax.', + ), + ) == + paths.lastOrNull?.pathId, + ); + + @override + List get props => [ + groupId, + paths, + children, + element, + original, + position, + radical, + part, + ]; + + Map toJson() => { + 'groupId': groupId, + 'paths': paths.map((e) => e.toJson()).toList(), + 'children': children.map((e) => e.toJson()).toList(), + 'element': element, + 'original': original, + 'position': position?.toJson(), + 'radical': radical?.toJson(), + 'part': part, + }; + + factory KanjiVGPathGroup.fromJson( + Map json, + ) => KanjiVGPathGroup( + groupId: json['groupId'] as int, + paths: ((json['paths'] as List?) ?? const []) + .map((e) => KanjiVGPath.fromJson(Map.from(e as Map))) + .toList(), + children: ((json['children'] as List?) ?? const []) + .map( + (e) => KanjiVGPathGroup.fromJson(Map.from(e as Map)), + ) + .toList(), + element: json['element'] as String?, + original: json['original'] as String?, + position: KanjiVGPathGroupPosition.fromJson(json['position']), + radical: KanjiVGRadical.fromJson(json['radical']), + part: json['part'] as int?, + ); +} diff --git a/lib/models/kanjivg/kanjivg_path_group_position.dart b/lib/models/kanjivg/kanjivg_path_group_position.dart new file mode 100644 index 0000000..90c5ff5 --- /dev/null +++ b/lib/models/kanjivg/kanjivg_path_group_position.dart @@ -0,0 +1,41 @@ +/// Relative position tags used by KanjiVG path-groups. +/// +/// In the original SVG files these come from the `kvg:position` attribute. +/// The database stores the normalized enum name, while [svgValue] contains the +/// raw KanjiVG attribute value. +enum KanjiVGPathGroupPosition { + upperA(svgValue: '⿵A'), + upperB(svgValue: '⿵B'), + lower1(svgValue: '⿶1'), + lower2(svgValue: '⿶2'), + bottom(svgValue: 'bottom'), + kamae(svgValue: 'kamae'), + kamaec(svgValue: 'kamaec'), + left(svgValue: 'left'), + middle(svgValue: 'middle'), + nyo(svgValue: 'nyo'), + nyoc(svgValue: 'nyoc'), + right(svgValue: 'right'), + tare(svgValue: 'tare'), + tarec(svgValue: 'tarec'), + top(svgValue: 'top'); + + final String svgValue; + + const KanjiVGPathGroupPosition({required this.svgValue}); + + /// Parses either the normalized enum name stored in the database/JSON, or + /// the raw KanjiVG SVG attribute value. + static KanjiVGPathGroupPosition fromString(String value) => values.firstWhere( + (e) => e.name == value || e.svgValue == value, + orElse: () => throw Exception('Unknown position: $value'), + ); + + static KanjiVGPathGroupPosition? fromNullableString(String? value) => + value == null ? null : fromString(value); + + Object? toJson() => name; + + static KanjiVGPathGroupPosition? fromJson(Object? json) => + fromNullableString(json as String?); +} diff --git a/lib/models/kanjivg/kanjivg_radical.dart b/lib/models/kanjivg/kanjivg_radical.dart new file mode 100644 index 0000000..4e455ab --- /dev/null +++ b/lib/models/kanjivg/kanjivg_radical.dart @@ -0,0 +1,20 @@ +/// Radical classification tags used by KanjiVG path-groups. +enum KanjiVGRadical { + general, + jis, + nelson, + tradit; + + static KanjiVGRadical fromString(String value) => values.firstWhere( + (e) => e.name == value, + orElse: () => throw Exception('Unknown radical: $value'), + ); + + static KanjiVGRadical? fromNullableString(String? value) => + value == null ? null : fromString(value); + + Object? toJson() => name; + + static KanjiVGRadical? fromJson(Object? json) => + fromNullableString(json as String?); +} diff --git a/lib/models/verify_tables.dart b/lib/models/verify_tables.dart index fe5c955..23a5559 100644 --- a/lib/models/verify_tables.dart +++ b/lib/models/verify_tables.dart @@ -1,5 +1,6 @@ import 'package:jadb/table_names/jmdict.dart'; import 'package:jadb/table_names/kanjidic.dart'; +import 'package:jadb/table_names/kanjivg.dart'; import 'package:jadb/table_names/radkfile.dart'; import 'package:jadb/table_names/tanos_jlpt.dart'; import 'package:sqflite_common/sqlite_api.dart'; @@ -21,6 +22,7 @@ Future verifyTablesWithDbConnection(DatabaseExecutor db) async { ...KANJIDICTableNames.allTables, ...RADKFILETableNames.allTables, ...TanosJLPTTableNames.allTables, + ...KanjiVGTableNames.allTables, }; final missingTables = expectedTables.difference(tables); diff --git a/lib/search.dart b/lib/search.dart index 66956d7..6045e89 100644 --- a/lib/search.dart +++ b/lib/search.dart @@ -1,9 +1,11 @@ import 'package:jadb/const_data/radicals.dart'; import 'package:jadb/models/kanji_search/kanji_search_result.dart'; +import 'package:jadb/models/kanjivg/kanjivg_entry.dart'; import 'package:jadb/models/verify_tables.dart'; import 'package:jadb/models/word_search/word_search_result.dart'; import 'package:jadb/search/filter_kanji.dart'; import 'package:jadb/search/kanji_search.dart'; +import 'package:jadb/search/kanji_vg_search.dart'; import 'package:jadb/search/radical_search.dart'; import 'package:jadb/search/versions.dart'; import 'package:jadb/search/word_search/word_search.dart'; @@ -24,6 +26,16 @@ extension JaDBConnection on DatabaseExecutor { Iterable kanji, ) => searchManyKanjiWithDbConnection(this, kanji); + /// Search for a KanjiVG graph in the database. + Future jadbSearchKanjiVGGraph( + String kanji, { + bool includePathGroups = false, + }) => searchKanjiVGGraphWithDbConnection( + this, + kanji, + includePathGroups: includePathGroups, + ); + /// Filter a list of characters, and return the ones that are listed in the kanji dictionary. Future> filterKanji( Iterable kanji, { diff --git a/lib/search/kanji_vg_search.dart b/lib/search/kanji_vg_search.dart new file mode 100644 index 0000000..b35bc0a --- /dev/null +++ b/lib/search/kanji_vg_search.dart @@ -0,0 +1,175 @@ +import 'package:jadb/models/kanjivg/kanjivg_entry.dart'; +import 'package:jadb/models/kanjivg/kanjivg_path.dart'; +import 'package:jadb/models/kanjivg/kanjivg_path_group.dart'; +import 'package:jadb/models/kanjivg/kanjivg_path_group_position.dart'; +import 'package:jadb/models/kanjivg/kanjivg_radical.dart'; +import 'package:jadb/table_names/kanjivg.dart'; +import 'package:sqflite_common/sqlite_api.dart'; + +Future>> _entryQuery( + DatabaseExecutor connection, + String entryKey, +) => connection.rawQuery( + ''' + SELECT * + FROM "${KanjiVGTableNames.entry}" + WHERE "character" = ? + OR "character" LIKE ? + ORDER BY "character" != ?, "character" + LIMIT 1 + ''', + [entryKey, '$entryKey-%', entryKey], +); + +Future>> _pathsQuery( + DatabaseExecutor connection, + String entryKey, +) => connection.rawQuery( + ''' + SELECT + "${KanjiVGTableNames.path}"."pathId", + "${KanjiVGTableNames.path}"."groupId", + "${KanjiVGTableNames.path}"."type", + "${KanjiVGTableNames.path}"."svgPath", + "${KanjiVGTableNames.strokeNumber}"."x", + "${KanjiVGTableNames.strokeNumber}"."y" + FROM "${KanjiVGTableNames.path}" + JOIN "${KanjiVGTableNames.strokeNumber}" + ON "${KanjiVGTableNames.path}"."character" = "${KanjiVGTableNames.strokeNumber}"."character" + AND "${KanjiVGTableNames.path}"."pathId" = "${KanjiVGTableNames.strokeNumber}"."strokeNum" + WHERE "${KanjiVGTableNames.path}"."character" = ? + ORDER BY "${KanjiVGTableNames.path}"."pathId" + ''', + [entryKey], +); + +Future>> _pathGroupsQuery( + DatabaseExecutor connection, + String entryKey, +) => connection.query( + KanjiVGTableNames.pathGroup, + where: 'character = ?', + whereArgs: [entryKey], + orderBy: 'groupId', +); + +String _normalizeKanjiVGEntryKey(String kanji) { + final encodedMatch = RegExp(r'^([0-9a-fA-F]{5,6})(-.+)?$').firstMatch(kanji); + if (encodedMatch != null) { + return '${encodedMatch.group(1)!.toLowerCase()}${encodedMatch.group(2) ?? ''}'; + } + + final runes = kanji.runes.toList(growable: false); + if (runes.length == 1) { + return runes.single.toRadixString(16).padLeft(5, '0'); + } + + return kanji; +} + +String _characterFromEntryKey(String entryKey, String fallback) { + final encodedMatch = RegExp(r'^([0-9a-fA-F]{5,6})').firstMatch(entryKey); + if (encodedMatch == null) { + return fallback; + } + + return String.fromCharCode(int.parse(encodedMatch.group(1)!, radix: 16)); +} + +KanjiVGPath _pathFromRow(Map row) => KanjiVGPath( + pathId: row['pathId'] as int, + type: row['type'] as String?, + svgPath: row['svgPath'] as String, + labelX: (row['x'] as num).toDouble(), + labelY: (row['y'] as num).toDouble(), +); + +List _buildPathGroups( + List> pathRows, + List> pathGroupRows, +) { + final rowsByGroupId = >{ + for (final row in pathGroupRows) (row['groupId'] as int?)!: row, + }; + + final childGroupIdsByParentGroupId = >{}; + for (final row in pathGroupRows) { + final groupId = (row['groupId'] as int?)!; + final parentGroupId = row['parentGroupId'] as int?; + childGroupIdsByParentGroupId + .putIfAbsent(parentGroupId, () => []) + .add(groupId); + } + + final pathsByGroupId = >{}; + for (final row in pathRows) { + final groupId = (row['groupId'] as int?)!; + pathsByGroupId.putIfAbsent(groupId, () => []).add(_pathFromRow(row)); + } + + KanjiVGPathGroup buildGroup(int groupId) { + final row = rowsByGroupId[groupId]!; + + return KanjiVGPathGroup( + groupId: groupId, + paths: pathsByGroupId[groupId] ?? const [], + children: (childGroupIdsByParentGroupId[groupId] ?? const []) + .map(buildGroup) + .toList(growable: false), + element: row['element'] as String?, + original: row['original'] as String?, + position: KanjiVGPathGroupPosition.fromNullableString( + row['position'] as String?, + ), + radical: KanjiVGRadical.fromNullableString(row['radical'] as String?), + part: row['part'] as int?, + ); + } + + return (childGroupIdsByParentGroupId[null] ?? const []) + .map(buildGroup) + .toList(growable: false); +} + +/// Searches for a KanjiVG graph and returns its stroke data, or null if the +/// kanji is not found in the database. +Future searchKanjiVGGraphWithDbConnection( + DatabaseExecutor connection, + String kanji, { + bool includePathGroups = false, +}) async { + final entryKey = _normalizeKanjiVGEntryKey(kanji); + final entryRows = await _entryQuery(connection, entryKey); + + if (entryRows.isEmpty) { + return null; + } + + final matchedEntryKey = entryRows.first['character'] as String; + + late final List> pathRows; + List> pathGroupRows = const []; + + if (includePathGroups) { + await Future.wait([ + _pathsQuery( + connection, + matchedEntryKey, + ).then((value) => pathRows = value), + _pathGroupsQuery( + connection, + matchedEntryKey, + ).then((value) => pathGroupRows = value), + ]); + } else { + pathRows = await _pathsQuery(connection, matchedEntryKey); + } + + return KanjiVGEntry( + character: _characterFromEntryKey(matchedEntryKey, kanji), + paths: pathRows.map(_pathFromRow).toList(growable: false), + pathGroups: includePathGroups + ? _buildPathGroups(pathRows, pathGroupRows) + : null, + ); +} diff --git a/lib/table_names/kanjivg.dart b/lib/table_names/kanjivg.dart new file mode 100644 index 0000000..fbe355a --- /dev/null +++ b/lib/table_names/kanjivg.dart @@ -0,0 +1,15 @@ +abstract class KanjiVGTableNames { + static const String version = 'KanjiVG_Version'; + static const String entry = 'KanjiVG_Entry'; + static const String path = 'KanjiVG_Path'; + static const String strokeNumber = 'KanjiVG_StrokeNumber'; + static const String pathGroup = 'KanjiVG_PathGroup'; + + static Set get allTables => { + version, + entry, + path, + strokeNumber, + pathGroup, + }; +} diff --git a/lib/version.dart b/lib/version.dart index c7e1e60..7376881 100644 --- a/lib/version.dart +++ b/lib/version.dart @@ -1 +1 @@ -const int jadbSchemaVersion = 2; +const int jadbSchemaVersion = 3; diff --git a/migrations/0011_KanjiVG.sql b/migrations/0011_KanjiVG.sql new file mode 100644 index 0000000..f2cba52 --- /dev/null +++ b/migrations/0011_KanjiVG.sql @@ -0,0 +1,69 @@ +CREATE TABLE "KanjiVG_Version" ( + "version" VARCHAR(10) PRIMARY KEY NOT NULL, + "date" DATE NOT NULL, + "hash" VARCHAR(64) NOT NULL +) WITHOUT ROWID; + +CREATE TRIGGER "KanjiVG_Version_SingleRow" +BEFORE INSERT ON "KanjiVG_Version" +WHEN (SELECT COUNT(*) FROM "KanjiVG_Version") >= 1 +BEGIN + SELECT RAISE(FAIL, 'Only one row allowed in KanjiVG_Version'); +END; + +CREATE TABLE "KanjiVG_Entry" ( + "character" CHAR(1) PRIMARY KEY NOT NULL +) WITHOUT ROWID; + +CREATE TABLE "KanjiVG_StrokeNumber" ( + "character" CHAR(1) NOT NULL REFERENCES "KanjiVG_Entry"("character"), + "strokeNum" INTEGER NOT NULL, + "x" REAL NOT NULL, + "y" REAL NOT NULL, + PRIMARY KEY ("character", "strokeNum"), + FOREIGN KEY ("character", "strokeNum") REFERENCES "KanjiVG_Path"("character", "pathId") +) WITHOUT ROWID; + +CREATE TABLE "KanjiVG_Path" ( + "character" CHAR(1) NOT NULL REFERENCES "KanjiVG_Entry"("character"), + "pathId" INTEGER NOT NULL, + "groupId" INTEGER NOT NULL, + "type" VARCHAR(10), + "svgPath" TEXT NOT NULL, + PRIMARY KEY ("character", "pathId"), + FOREIGN KEY ("character", "groupId") REFERENCES "KanjiVG_PathGroup"("character", "groupId") +) WITHOUT ROWID; + +CREATE TABLE "KanjiVG_PathGroup" ( + "character" CHAR(1) NOT NULL REFERENCES "KanjiVG_Entry"("character"), + "groupId" INTEGER NOT NULL, + "parentGroupId" INTEGER, + "element" TEXT, + "original" TEXT, + "position" VARCHAR(6), + "radical" TEXT, + "part" INTEGER, + PRIMARY KEY ("character", "groupId"), + CHECK ( + "position" IN ( + 'upperA', + 'upperB', + 'lower1', + 'lower2', + 'bottom', + 'kamae', + 'kamaec', + 'left', + 'middle', + 'nyo', + 'nyoc', + 'right', + 'tare', + 'tarec', + 'top' + ) + OR + "position" IS NULL + ), + FOREIGN KEY ("character", "parentGroupId") REFERENCES "KanjiVG_PathGroup"("character", "groupId") +) WITHOUT ROWID; diff --git a/nix/database.nix b/nix/database.nix index 21439e1..4545a22 100644 --- a/nix/database.nix +++ b/nix/database.nix @@ -7,6 +7,7 @@ radkfile, kanjidic2, tanos-jlpt, + kanjivg, sqlite, wal ? false, }: @@ -39,6 +40,8 @@ stdenvNoCC.mkDerivation { TANOS_JLPT_VERSION = tanos-jlpt.version; TANOS_JLPT_DATE = tanos-jlpt.date; TANOS_JLPT_HASH = tanos-jlpt.hash; + + KANJIVG_PATH = "${kanjivg}/kanji"; }; buildPhase = '' diff --git a/test/models/kanjivg_test.dart b/test/models/kanjivg_test.dart new file mode 100644 index 0000000..9c8309c --- /dev/null +++ b/test/models/kanjivg_test.dart @@ -0,0 +1,201 @@ +import 'dart:convert'; + +import 'package:jadb/models/kanjivg/kanjivg_entry.dart'; +import 'package:jadb/models/kanjivg/kanjivg_path.dart'; +import 'package:jadb/models/kanjivg/kanjivg_path_group.dart'; +import 'package:jadb/models/kanjivg/kanjivg_path_group_position.dart'; +import 'package:jadb/models/kanjivg/kanjivg_radical.dart'; +import 'package:test/test.dart'; + +Object? _roundTripJson(Object? value) => jsonDecode(jsonEncode(value)); + +Map _roundTripMap(Object? json) => + Map.from(_roundTripJson(json) as Map); + +void main() { + group('KanjiVG model serialization', () { + test('KanjiVGPathGroupPosition roundtrips all values', () { + for (final value in KanjiVGPathGroupPosition.values) { + expect( + KanjiVGPathGroupPosition.fromJson(_roundTripJson(value.toJson())), + equals(value), + ); + } + }); + + test('KanjiVGPathGroupPosition parses SVG aliases', () { + expect( + KanjiVGPathGroupPosition.fromString('⿵A'), + equals(KanjiVGPathGroupPosition.upperA), + ); + expect( + KanjiVGPathGroupPosition.fromString('⿵B'), + equals(KanjiVGPathGroupPosition.upperB), + ); + expect( + KanjiVGPathGroupPosition.fromString('⿶1'), + equals(KanjiVGPathGroupPosition.lower1), + ); + expect( + KanjiVGPathGroupPosition.fromString('⿶2'), + equals(KanjiVGPathGroupPosition.lower2), + ); + expect( + KanjiVGPathGroupPosition.fromString('left'), + equals(KanjiVGPathGroupPosition.left), + ); + }); + + test('KanjiVGRadical roundtrips all values', () { + for (final value in KanjiVGRadical.values) { + expect( + KanjiVGRadical.fromJson(_roundTripJson(value.toJson())), + equals(value), + ); + } + }); + + test('KanjiVGPath roundtrips via JSON', () { + final path = KanjiVGPath( + pathId: 1, + type: '㇐', + svgPath: 'M12.5,18c2.1,0.4,6.1,0.6,8.1,0.4', + labelX: 12.5, + labelY: 18.0, + ); + + final restored = KanjiVGPath.fromJson(_roundTripMap(path.toJson())); + + expect(restored, equals(path)); + }); + + test('KanjiVGPath roundtrips required label coordinates', () { + final path = KanjiVGPath( + pathId: 2, + type: '㇒', + svgPath: 'M18,12c0.5,1,1,2,1.5,3', + labelX: 9.5, + labelY: 14.0, + ); + + final restored = KanjiVGPath.fromJson(_roundTripMap(path.toJson())); + + expect(restored, equals(path)); + }); + + test('KanjiVGPathGroup roundtrips nested tree data', () { + final group = KanjiVGPathGroup( + groupId: 0, + element: '休', + position: KanjiVGPathGroupPosition.left, + paths: [ + KanjiVGPath( + pathId: 1, + type: '㇐', + svgPath: 'M10,10c1,0,2,0,3,0', + labelX: 7.0, + labelY: 9.0, + ), + ], + children: [ + KanjiVGPathGroup( + groupId: 1, + element: '人', + radical: KanjiVGRadical.general, + part: 1, + paths: [ + KanjiVGPath( + pathId: 2, + type: '㇒', + svgPath: 'M12,8c0.5,1,1,2,1.5,3', + labelX: 11.0, + labelY: 6.5, + ), + ], + ), + KanjiVGPathGroup( + groupId: 2, + element: '木', + original: '木', + position: KanjiVGPathGroupPosition.right, + paths: [ + KanjiVGPath( + pathId: 3, + type: '㇑', + svgPath: 'M18,9c0,2,0,4,0,6', + labelX: 19.0, + labelY: 7.0, + ), + ], + ), + ], + ); + + final restored = KanjiVGPathGroup.fromJson(_roundTripMap(group.toJson())); + + expect(restored, equals(group)); + }); + + test('KanjiVGEntry roundtrips populated data', () { + final entry = KanjiVGEntry( + character: '休', + paths: [ + KanjiVGPath( + pathId: 1, + type: '㇒', + svgPath: 'M18,12c0.5,1,1,2,1.5,3', + labelX: 12.0, + labelY: 10.0, + ), + KanjiVGPath( + pathId: 2, + type: '㇐', + svgPath: 'M30,24c2,0,6,0,8,0', + labelX: 28.0, + labelY: 21.0, + ), + ], + pathGroups: [ + KanjiVGPathGroup( + groupId: 0, + element: '休', + children: [ + KanjiVGPathGroup( + groupId: 1, + element: '人', + radical: KanjiVGRadical.general, + paths: [ + KanjiVGPath( + pathId: 1, + type: '㇒', + svgPath: 'M18,12c0.5,1,1,2,1.5,3', + labelX: 12.0, + labelY: 10.0, + ), + ], + ), + KanjiVGPathGroup( + groupId: 2, + element: '木', + position: KanjiVGPathGroupPosition.right, + paths: [ + KanjiVGPath( + pathId: 2, + type: '㇐', + svgPath: 'M30,24c2,0,6,0,8,0', + labelX: 28.0, + labelY: 21.0, + ), + ], + ), + ], + ), + ], + ); + + final restored = KanjiVGEntry.fromJson(_roundTripMap(entry.toJson())); + + expect(restored, equals(entry)); + }); + }); +} diff --git a/test/search/kanji_vg_search_test.dart b/test/search/kanji_vg_search_test.dart new file mode 100644 index 0000000..6ea54d7 --- /dev/null +++ b/test/search/kanji_vg_search_test.dart @@ -0,0 +1,67 @@ +import 'package:jadb/models/kanjivg/kanjivg_path_group.dart'; +import 'package:jadb/search.dart'; +import 'package:test/test.dart'; + +import 'setup_database_connection.dart'; + +Iterable _flattenGroups( + Iterable groups, +) sync* { + for (final group in groups) { + yield group; + yield* _flattenGroups(group.children); + } +} + +void main() { + group('KanjiVG search', () { + test('returns null when the entry does not exist', () async { + final connection = await setupDatabaseConnection(); + addTearDown(() async => connection.close()); + + final result = await connection.jadbSearchKanjiVGGraph('notfound'); + + expect(result, isNull); + }); + + test('returns entry paths without path groups by default', () async { + final connection = await setupDatabaseConnection(); + addTearDown(() async => connection.close()); + + final result = await connection.jadbSearchKanjiVGGraph('休'); + + expect(result, isNotNull); + expect(result!.character, equals('休')); + expect(result.paths, isNotEmpty); + expect(result.pathGroups, isNull); + }); + + test('returns the path-group graph when requested', () async { + final connection = await setupDatabaseConnection(); + addTearDown(() async => connection.close()); + + final result = await connection.jadbSearchKanjiVGGraph( + '休', + includePathGroups: true, + ); + + expect(result, isNotNull); + expect(result!.pathGroups, isNotNull); + expect(result.pathGroups, isNotEmpty); + + final allGroups = _flattenGroups(result.pathGroups!).toList(); + final groupedPathIds = + allGroups + .expand((group) => group.paths) + .map((path) => path.pathId) + .toList() + ..sort(); + final entryPathIds = result.paths.map((path) => path.pathId).toList() + ..sort(); + + expect(allGroups.any((group) => group.groupId == 0), isTrue); + expect(allGroups.any((group) => group.paths.isNotEmpty), isTrue); + expect(groupedPathIds, equals(entryPathIds)); + }); + }); +}