Compare commits
1 Commits
add-kanjiv
...
word-regro
| Author | SHA1 | Date | |
|---|---|---|---|
|
52e9954c71
|
17
flake.lock
generated
17
flake.lock
generated
@@ -36,22 +36,6 @@
|
||||
"url": "https://www.edrdg.org/kanjidic/kanjidic2.xml.gz"
|
||||
}
|
||||
},
|
||||
"kanjivg-src": {
|
||||
"flake": false,
|
||||
"locked": {
|
||||
"lastModified": 1772352482,
|
||||
"narHash": "sha256-8EG3Y1daI2B24NELQwU+eXl/7OmWnW/RXMAQSRVLzWw=",
|
||||
"ref": "refs/heads/master",
|
||||
"rev": "0b4309cf6d74799b0e4b72940d8267fbe73f72d0",
|
||||
"revCount": 2212,
|
||||
"type": "git",
|
||||
"url": "https://git.pvv.ntnu.no/mugiten/kanjivg.git"
|
||||
},
|
||||
"original": {
|
||||
"type": "git",
|
||||
"url": "https://git.pvv.ntnu.no/mugiten/kanjivg.git"
|
||||
}
|
||||
},
|
||||
"nixpkgs": {
|
||||
"locked": {
|
||||
"lastModified": 1771848320,
|
||||
@@ -84,7 +68,6 @@
|
||||
"jmdict-src": "jmdict-src",
|
||||
"jmdict-with-examples-src": "jmdict-with-examples-src",
|
||||
"kanjidic2-src": "kanjidic2-src",
|
||||
"kanjivg-src": "kanjivg-src",
|
||||
"nixpkgs": "nixpkgs",
|
||||
"radkfile-src": "radkfile-src"
|
||||
}
|
||||
|
||||
@@ -24,11 +24,6 @@
|
||||
url = "https://www.edrdg.org/kanjidic/kanjidic2.xml.gz";
|
||||
flake = false;
|
||||
};
|
||||
|
||||
kanjivg-src = {
|
||||
url = "git+https://git.pvv.ntnu.no/mugiten/kanjivg.git";
|
||||
flake = false;
|
||||
};
|
||||
};
|
||||
|
||||
outputs = {
|
||||
@@ -37,8 +32,7 @@
|
||||
jmdict-src,
|
||||
jmdict-with-examples-src,
|
||||
radkfile-src,
|
||||
kanjidic2-src,
|
||||
kanjivg-src,
|
||||
kanjidic2-src
|
||||
}: let
|
||||
inherit (nixpkgs) lib;
|
||||
systems = [
|
||||
|
||||
@@ -1,92 +0,0 @@
|
||||
import 'package:jadb/_data_ingestion/sql_writable.dart';
|
||||
|
||||
/// Enum set in the kvg:position attribute, used by `<g>` elements in the KanjiVG SVG files.
|
||||
enum KanjiPathGroupPosition {
|
||||
bottom,
|
||||
kamae,
|
||||
kamaec,
|
||||
left,
|
||||
middle,
|
||||
nyo,
|
||||
nyoc,
|
||||
right,
|
||||
tare,
|
||||
tarec,
|
||||
top,
|
||||
}
|
||||
|
||||
/// Contents of a \<g> element in the KanjiVG SVG files.
|
||||
class KanjiPathGroupTreeNode extends SQLWritable {
|
||||
final String id;
|
||||
final List<KanjiPathGroupTreeNode> children;
|
||||
final String? element;
|
||||
final String? original;
|
||||
final KanjiPathGroupPosition? position;
|
||||
final String? radical;
|
||||
final int? part;
|
||||
|
||||
KanjiPathGroupTreeNode({
|
||||
required this.id,
|
||||
this.children = const [],
|
||||
this.element,
|
||||
this.original,
|
||||
this.position,
|
||||
this.radical,
|
||||
this.part,
|
||||
});
|
||||
|
||||
@override
|
||||
Map<String, Object?> get sqlValue => {
|
||||
'id': id,
|
||||
'element': element,
|
||||
'original': original,
|
||||
'position': position?.name,
|
||||
'radical': radical,
|
||||
'part': part,
|
||||
};
|
||||
}
|
||||
|
||||
/// Contents of a `<text>` element in the StrokeNumber's group in the KanjiVG SVG files
|
||||
class KanjiStrokeNumber extends SQLWritable {
|
||||
final int num;
|
||||
final double x;
|
||||
final double y;
|
||||
|
||||
KanjiStrokeNumber(this.num, this.x, this.y);
|
||||
|
||||
@override
|
||||
Map<String, Object?> get sqlValue => {'num': num, 'x': x, 'y': y};
|
||||
}
|
||||
|
||||
/// Contents of a `<path>` element in the KanjiVG SVG files
|
||||
class KanjiVGPath extends SQLWritable {
|
||||
final String id;
|
||||
final String type;
|
||||
final String svgPath;
|
||||
|
||||
KanjiVGPath({required this.id, required this.type, required this.svgPath});
|
||||
|
||||
@override
|
||||
Map<String, Object?> get sqlValue => {
|
||||
'id': id,
|
||||
'type': type,
|
||||
'svgPath': svgPath,
|
||||
};
|
||||
}
|
||||
|
||||
class KanjiVGItem extends SQLWritable {
|
||||
final String character;
|
||||
final List<KanjiVGPath> paths;
|
||||
final List<KanjiStrokeNumber> strokeNumbers;
|
||||
final List<KanjiPathGroupTreeNode> pathGroups;
|
||||
|
||||
KanjiVGItem({
|
||||
required this.character,
|
||||
required this.paths,
|
||||
required this.strokeNumbers,
|
||||
required this.pathGroups,
|
||||
});
|
||||
|
||||
@override
|
||||
Map<String, Object?> get sqlValue => {'character': character};
|
||||
}
|
||||
@@ -1,7 +0,0 @@
|
||||
import 'package:sqflite_common/sqflite.dart';
|
||||
|
||||
Future<void> seedKanjiVGData(Iterable<String> xmlContents, Database db) async {
|
||||
final b = db.batch();
|
||||
|
||||
await b.commit(noResult: true);
|
||||
}
|
||||
@@ -1,6 +1,5 @@
|
||||
import 'package:jadb/table_names/jmdict.dart';
|
||||
import 'package:jadb/table_names/kanjidic.dart';
|
||||
import 'package:jadb/table_names/kanjivg.dart';
|
||||
import 'package:jadb/table_names/radkfile.dart';
|
||||
import 'package:jadb/table_names/tanos_jlpt.dart';
|
||||
import 'package:sqflite_common/sqlite_api.dart';
|
||||
@@ -22,7 +21,6 @@ Future<void> verifyTablesWithDbConnection(DatabaseExecutor db) async {
|
||||
...KANJIDICTableNames.allTables,
|
||||
...RADKFILETableNames.allTables,
|
||||
...TanosJLPTTableNames.allTables,
|
||||
...KanjiVGTableNames.allTables,
|
||||
};
|
||||
|
||||
final missingTables = expectedTables.difference(tables);
|
||||
|
||||
@@ -21,49 +21,83 @@ List<WordSearchResult> regroupWordSearchResults({
|
||||
}) {
|
||||
final List<WordSearchResult> results = [];
|
||||
|
||||
final commonEntryIds = linearWordQueryData.commonEntries
|
||||
final Set<int> commonEntryIds = linearWordQueryData.commonEntries
|
||||
.map((entry) => entry['entryId'] as int)
|
||||
.toSet();
|
||||
|
||||
final Map<int, List<Map<String, Object?>>> entryReadingElementsByEntryId =
|
||||
linearWordQueryData.readingElements.groupListsBy(
|
||||
(element) => element['entryId'] as int,
|
||||
);
|
||||
|
||||
final Map<int, List<Map<String, Object?>>> entryKanjiElementsByEntryId =
|
||||
linearWordQueryData.kanjiElements.groupListsBy(
|
||||
(element) => element['entryId'] as int,
|
||||
);
|
||||
|
||||
final Map<int, int> elementIdToEntryId = {
|
||||
for (final element in linearWordQueryData.readingElements)
|
||||
element['elementId'] as int: element['entryId'] as int,
|
||||
for (final element in linearWordQueryData.kanjiElements)
|
||||
element['elementId'] as int: element['entryId'] as int,
|
||||
};
|
||||
|
||||
final Map<int, List<Map<String, Object?>>> entryReadingElementInfosByEntryId =
|
||||
linearWordQueryData.readingElementInfos.groupListsBy(
|
||||
(element) => elementIdToEntryId[element['elementId'] as int]!,
|
||||
);
|
||||
|
||||
final Map<int, List<Map<String, Object?>>> entryKanjiElementInfosByEntryId =
|
||||
linearWordQueryData.kanjiElementInfos.groupListsBy(
|
||||
(element) => elementIdToEntryId[element['elementId'] as int]!,
|
||||
);
|
||||
|
||||
final Map<int, List<Map<String, Object?>>>
|
||||
entryReadingElementRestrictionsByEntryId = linearWordQueryData
|
||||
.readingElementRestrictions
|
||||
.groupListsBy(
|
||||
(element) => elementIdToEntryId[element['elementId'] as int]!,
|
||||
);
|
||||
|
||||
final Map<int, JlptLevel> entryJlptTagsByEntryId = linearWordQueryData
|
||||
.jlptTags
|
||||
.groupSetsBy((element) => element['entryId'] as int)
|
||||
.map(
|
||||
(final key, final value) => MapEntry(
|
||||
key,
|
||||
value.map((e) => JlptLevel.fromString(e['jlptLevel'] as String?)).min,
|
||||
),
|
||||
);
|
||||
|
||||
final Map<int, List<Map<String, Object?>>> entrySensesByEntryId =
|
||||
linearWordQueryData.senses.groupListsBy(
|
||||
(element) => element['entryId'] as int,
|
||||
);
|
||||
|
||||
for (final scoredEntryId in entryIds) {
|
||||
final List<Map<String, Object?>> entryReadingElements = linearWordQueryData
|
||||
.readingElements
|
||||
.where((element) => element['entryId'] == scoredEntryId.entryId)
|
||||
.toList();
|
||||
|
||||
final List<Map<String, Object?>> entryKanjiElements = linearWordQueryData
|
||||
.kanjiElements
|
||||
.where((element) => element['entryId'] == scoredEntryId.entryId)
|
||||
.toList();
|
||||
|
||||
final List<Map<String, Object?>> entryJlptTags = linearWordQueryData
|
||||
.jlptTags
|
||||
.where((element) => element['entryId'] == scoredEntryId.entryId)
|
||||
.toList();
|
||||
|
||||
final jlptLevel =
|
||||
entryJlptTags
|
||||
.map((e) => JlptLevel.fromString(e['jlptLevel'] as String?))
|
||||
.sorted((a, b) => b.compareTo(a))
|
||||
.firstOrNull ??
|
||||
JlptLevel.none;
|
||||
|
||||
final isCommon = commonEntryIds.contains(scoredEntryId.entryId);
|
||||
|
||||
final List<Map<String, Object?>> entrySenses = linearWordQueryData.senses
|
||||
.where((element) => element['entryId'] == scoredEntryId.entryId)
|
||||
.toList();
|
||||
final List<Map<String, Object?>> entryReadingElements =
|
||||
entryReadingElementsByEntryId[scoredEntryId.entryId] ?? const [];
|
||||
final List<Map<String, Object?>> entryKanjiElements =
|
||||
entryKanjiElementsByEntryId[scoredEntryId.entryId] ?? const [];
|
||||
final List<Map<String, Object?>> entryReadingElementInfos =
|
||||
entryReadingElementInfosByEntryId[scoredEntryId.entryId] ?? const [];
|
||||
final List<Map<String, Object?>> entryKanjiElementInfos =
|
||||
entryKanjiElementInfosByEntryId[scoredEntryId.entryId] ?? const [];
|
||||
final List<Map<String, Object?>> entryReadingElementRestrictions =
|
||||
entryReadingElementRestrictionsByEntryId[scoredEntryId.entryId] ??
|
||||
const [];
|
||||
|
||||
final GroupedWordResult entryReadingElementsGrouped = _regroupWords(
|
||||
entryId: scoredEntryId.entryId,
|
||||
readingElements: entryReadingElements,
|
||||
kanjiElements: entryKanjiElements,
|
||||
readingElementInfos: linearWordQueryData.readingElementInfos,
|
||||
readingElementRestrictions:
|
||||
linearWordQueryData.readingElementRestrictions,
|
||||
kanjiElementInfos: linearWordQueryData.kanjiElementInfos,
|
||||
kanjiElementInfos: entryKanjiElementInfos,
|
||||
readingElements: entryReadingElements,
|
||||
readingElementInfos: entryReadingElementInfos,
|
||||
readingElementRestrictions: entryReadingElementRestrictions,
|
||||
);
|
||||
|
||||
final List<Map<String, Object?>> entrySenses =
|
||||
entrySensesByEntryId[scoredEntryId.entryId] ?? const [];
|
||||
|
||||
final List<WordSearchSense> entrySensesGrouped = _regroupSenses(
|
||||
senses: entrySenses,
|
||||
senseAntonyms: linearWordQueryData.senseAntonyms,
|
||||
@@ -82,6 +116,10 @@ List<WordSearchResult> regroupWordSearchResults({
|
||||
senseAntonymsXrefData: linearWordQueryData.senseAntonymData,
|
||||
);
|
||||
|
||||
final bool isCommon = commonEntryIds.contains(scoredEntryId.entryId);
|
||||
final JlptLevel jlptLevel =
|
||||
entryJlptTagsByEntryId[scoredEntryId.entryId] ?? JlptLevel.none;
|
||||
|
||||
results.add(
|
||||
WordSearchResult(
|
||||
score: scoredEntryId.score,
|
||||
@@ -113,7 +151,6 @@ class GroupedWordResult {
|
||||
}
|
||||
|
||||
GroupedWordResult _regroupWords({
|
||||
required int entryId,
|
||||
required List<Map<String, Object?>> kanjiElements,
|
||||
required List<Map<String, Object?>> kanjiElementInfos,
|
||||
required List<Map<String, Object?>> readingElements,
|
||||
@@ -122,36 +159,34 @@ GroupedWordResult _regroupWords({
|
||||
}) {
|
||||
final List<WordSearchRuby> rubys = [];
|
||||
|
||||
final kanjiElements_ = kanjiElements
|
||||
.where((element) => element['entryId'] == entryId)
|
||||
.toList();
|
||||
final Map<int, Set<String>> readingElementRestrictionsSet =
|
||||
readingElementRestrictions
|
||||
.groupSetsBy((element) => element['elementId'] as int)
|
||||
.map(
|
||||
(key, value) => MapEntry(
|
||||
key,
|
||||
value.map((e) => e['restriction'] as String).toSet(),
|
||||
),
|
||||
);
|
||||
|
||||
final readingElements_ = readingElements
|
||||
.where((element) => element['entryId'] == entryId)
|
||||
.toList();
|
||||
|
||||
final readingElementRestrictions_ = readingElementRestrictions
|
||||
.where((element) => element['entryId'] == entryId)
|
||||
.toList();
|
||||
|
||||
for (final readingElement in readingElements_) {
|
||||
if (readingElement['doesNotMatchKanji'] == 1 || kanjiElements_.isEmpty) {
|
||||
// Construct a cartesian product of kanji + readings, with exceptions made for items marked in `restrictions`.
|
||||
for (final readingElement in readingElements) {
|
||||
if (readingElement['doesNotMatchKanji'] == 1 || kanjiElements.isEmpty) {
|
||||
final ruby = WordSearchRuby(base: readingElement['reading'] as String);
|
||||
rubys.add(ruby);
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
for (final kanjiElement in kanjiElements_) {
|
||||
for (final kanjiElement in kanjiElements) {
|
||||
final kanji = kanjiElement['reading'] as String;
|
||||
final reading = readingElement['reading'] as String;
|
||||
|
||||
final restrictions = readingElementRestrictions_
|
||||
.where((element) => element['reading'] == reading)
|
||||
.toList();
|
||||
|
||||
if (restrictions.isNotEmpty &&
|
||||
!restrictions.any((element) => element['restriction'] == kanji)) {
|
||||
// The 'restrictions' act as an allowlist, meaning that non-matching kanji elements should be ignored.
|
||||
final restrictions =
|
||||
readingElementRestrictionsSet[readingElement['elementId'] as int] ??
|
||||
{};
|
||||
if (restrictions.isNotEmpty && !restrictions.contains(kanji)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -160,35 +195,30 @@ GroupedWordResult _regroupWords({
|
||||
}
|
||||
}
|
||||
|
||||
assert(rubys.isNotEmpty, 'No readings found for entryId: $entryId');
|
||||
assert(
|
||||
rubys.isNotEmpty,
|
||||
'No readings found for entryId: ${kanjiElements.firstOrNull?['entryId'] ?? readingElements.firstOrNull?['entryId'] ?? '???'}',
|
||||
);
|
||||
|
||||
final Map<int, String> readingElementIdsToReading = {
|
||||
for (final element in readingElements_)
|
||||
for (final element in readingElements)
|
||||
element['elementId'] as int: element['reading'] as String,
|
||||
};
|
||||
|
||||
final Map<int, String> kanjiElementIdsToReading = {
|
||||
for (final element in kanjiElements_)
|
||||
for (final element in kanjiElements)
|
||||
element['elementId'] as int: element['reading'] as String,
|
||||
};
|
||||
|
||||
final readingElementInfos_ = readingElementInfos
|
||||
.where((element) => element['entryId'] == entryId)
|
||||
.toList();
|
||||
|
||||
final kanjiElementInfos_ = kanjiElementInfos
|
||||
.where((element) => element['entryId'] == entryId)
|
||||
.toList();
|
||||
|
||||
return GroupedWordResult(
|
||||
rubys: rubys,
|
||||
readingInfos: {
|
||||
for (final rei in readingElementInfos_)
|
||||
for (final rei in readingElementInfos)
|
||||
readingElementIdsToReading[rei['elementId'] as int]!:
|
||||
JMdictReadingInfo.fromId(rei['info'] as String),
|
||||
},
|
||||
kanjiInfos: {
|
||||
for (final kei in kanjiElementInfos_)
|
||||
for (final kei in kanjiElementInfos)
|
||||
kanjiElementIdsToReading[kei['elementId'] as int]!:
|
||||
JMdictKanjiInfo.fromId(kei['info'] as String),
|
||||
},
|
||||
|
||||
@@ -1,9 +0,0 @@
|
||||
abstract class KanjiVGTableNames {
|
||||
static const String version = 'KanjiVG_Version';
|
||||
static const String entry = 'KanjiVG_Entry';
|
||||
static const String path = 'KanjiVG_Path';
|
||||
static const String strokeNumber = 'KanjiVG_StrokeNumber';
|
||||
static const String pathGroup = 'KanjiVG_PathGroup';
|
||||
|
||||
static Set<String> get allTables => {version, entry, path, strokeNumber, pathGroup};
|
||||
}
|
||||
@@ -1,45 +0,0 @@
|
||||
CREATE TABLE "KanjiVG_Version" (
|
||||
"version" VARCHAR(10) PRIMARY KEY NOT NULL,
|
||||
"date" DATE NOT NULL,
|
||||
"hash" VARCHAR(64) NOT NULL
|
||||
) WITHOUT ROWID;
|
||||
|
||||
CREATE TRIGGER "KanjiVG_Version_SingleRow"
|
||||
BEFORE INSERT ON "KanjiVG_Version"
|
||||
WHEN (SELECT COUNT(*) FROM "KanjiVG_Version") >= 1
|
||||
BEGIN
|
||||
SELECT RAISE(FAIL, 'Only one row allowed in KanjiVG_Version');
|
||||
END;
|
||||
|
||||
CREATE TABLE "KanjiVG_Entry" (
|
||||
"character" CHAR(1) PRIMARY KEY NOT NULL
|
||||
) WITHOUT ROWID;
|
||||
|
||||
CREATE TABLE "KanjiVG_StrokeNumber" (
|
||||
"character" CHAR(1) NOT NULL REFERENCES "KanjiVG_Entry"("character"),
|
||||
"strokeNum" INTEGER NOT NULL,
|
||||
"x" REAL NOT NULL,
|
||||
"y" REAL NOT NULL,
|
||||
PRIMARY KEY ("character", "strokeNum")
|
||||
) WITHOUT ROWID;
|
||||
|
||||
CREATE TABLE "KanjiVG_Path" (
|
||||
"character" CHAR(1) NOT NULL REFERENCES "KanjiVG_Entry"("character"),
|
||||
"pathId" TEXT NOT NULL,
|
||||
"type" VARCHAR(10) NOT NULL,
|
||||
"svgPath" TEXT NOT NULL,
|
||||
PRIMARY KEY ("character", "pathId")
|
||||
) WITHOUT ROWID;
|
||||
|
||||
CREATE TABLE "KanjiVG_PathGroup" (
|
||||
"character" CHAR(1) NOT NULL REFERENCES "KanjiVG_Entry"("character"),
|
||||
"groupId" TEXT NOT NULL,
|
||||
"parentGroupId" TEXT REFERENCES "KanjiVG_PathGroup"("groupId"),
|
||||
"element" TEXT,
|
||||
"original" TEXT,
|
||||
"position" VARCHAR(10),
|
||||
"radical" TEXT,
|
||||
"part" INTEGER,
|
||||
PRIMARY KEY ("character", "groupId"),
|
||||
CHECK ("position" IN ('bottom', 'kamae', 'kamaec', 'left', 'middle', 'nyo', 'nyoc', 'right', 'tare', 'tarec', 'top') OR "position" IS NULL)
|
||||
) WITHOUT ROWID;
|
||||
Reference in New Issue
Block a user