1 Commits

Author SHA1 Message Date
df8b204d3c WIP: search/word_search: do a bit of performance optimization on result regrouping
All checks were successful
Build and test / build (push) Successful in 11m39s
2026-03-04 19:01:16 +09:00
4 changed files with 129 additions and 99 deletions

12
flake.lock generated
View File

@@ -3,7 +3,7 @@
"jmdict-src": {
"flake": false,
"locked": {
"narHash": "sha256-eOc3a/AYNRFF3w6lWhyf0Sh92xeXS7+9Qvn0tvvH6Ys=",
"narHash": "sha256-lh46uougUzBrRhhwa7cOb32j5Jt9/RjBUhlVjwVzsII=",
"type": "file",
"url": "http://ftp.edrdg.org/pub/Nihongo/JMdict_e.gz"
},
@@ -15,7 +15,7 @@
"jmdict-with-examples-src": {
"flake": false,
"locked": {
"narHash": "sha256-nx+WMkscWvA/XImKM7NESYVmICwSgXWOO1KPXasHY94=",
"narHash": "sha256-5oS2xDyetbuSM6ax3LUjYA3N60x+D3Hg41HEXGFMqLQ=",
"type": "file",
"url": "http://ftp.edrdg.org/pub/Nihongo/JMdict_e_examp.gz"
},
@@ -27,7 +27,7 @@
"kanjidic2-src": {
"flake": false,
"locked": {
"narHash": "sha256-2T/cAS/kZmVMURStgHVhz524+J9+v5onKs8eEYf2fY0=",
"narHash": "sha256-orSeQqSxhn9TtX3anYtbiMEm7nFkuomGnIKoVIUR2CM=",
"type": "file",
"url": "https://www.edrdg.org/kanjidic/kanjidic2.xml.gz"
},
@@ -38,11 +38,11 @@
},
"nixpkgs": {
"locked": {
"lastModified": 1774386573,
"narHash": "sha256-4hAV26quOxdC6iyG7kYaZcM3VOskcPUrdCQd/nx8obc=",
"lastModified": 1771848320,
"narHash": "sha256-0MAd+0mun3K/Ns8JATeHT1sX28faLII5hVLq0L3BdZU=",
"owner": "NixOS",
"repo": "nixpkgs",
"rev": "46db2e09e1d3f113a13c0d7b81e2f221c63b8ce9",
"rev": "2fc6539b481e1d2569f25f8799236694180c0993",
"type": "github"
},
"original": {

View File

@@ -21,49 +21,83 @@ List<WordSearchResult> regroupWordSearchResults({
}) {
final List<WordSearchResult> results = [];
final commonEntryIds = linearWordQueryData.commonEntries
final Set<int> commonEntryIds = linearWordQueryData.commonEntries
.map((entry) => entry['entryId'] as int)
.toSet();
final Map<int, List<Map<String, Object?>>> entryReadingElementsByEntryId =
linearWordQueryData.readingElements.groupListsBy(
(element) => element['entryId'] as int,
);
final Map<int, List<Map<String, Object?>>> entryKanjiElementsByEntryId =
linearWordQueryData.kanjiElements.groupListsBy(
(element) => element['entryId'] as int,
);
final Map<int, int> elementIdToEntryId = {
for (final element in linearWordQueryData.readingElements)
element['elementId'] as int: element['entryId'] as int,
for (final element in linearWordQueryData.kanjiElements)
element['elementId'] as int: element['entryId'] as int,
};
final Map<int, List<Map<String, Object?>>> entryReadingElementInfosByEntryId =
linearWordQueryData.readingElementInfos.groupListsBy(
(element) => elementIdToEntryId[element['elementId'] as int]!,
);
final Map<int, List<Map<String, Object?>>> entryKanjiElementInfosByEntryId =
linearWordQueryData.kanjiElementInfos.groupListsBy(
(element) => elementIdToEntryId[element['elementId'] as int]!,
);
final Map<int, List<Map<String, Object?>>>
entryReadingElementRestrictionsByEntryId = linearWordQueryData
.readingElementRestrictions
.groupListsBy(
(element) => elementIdToEntryId[element['elementId'] as int]!,
);
final Map<int, JlptLevel> entryJlptTagsByEntryId = linearWordQueryData
.jlptTags
.groupSetsBy((element) => element['entryId'] as int)
.map(
(final key, final value) => MapEntry(
key,
value.map((e) => JlptLevel.fromString(e['jlptLevel'] as String?)).min,
),
);
final Map<int, List<Map<String, Object?>>> entrySensesByEntryId =
linearWordQueryData.senses.groupListsBy(
(element) => element['entryId'] as int,
);
for (final scoredEntryId in entryIds) {
final List<Map<String, Object?>> entryReadingElements = linearWordQueryData
.readingElements
.where((element) => element['entryId'] == scoredEntryId.entryId)
.toList();
final List<Map<String, Object?>> entryKanjiElements = linearWordQueryData
.kanjiElements
.where((element) => element['entryId'] == scoredEntryId.entryId)
.toList();
final List<Map<String, Object?>> entryJlptTags = linearWordQueryData
.jlptTags
.where((element) => element['entryId'] == scoredEntryId.entryId)
.toList();
final jlptLevel =
entryJlptTags
.map((e) => JlptLevel.fromString(e['jlptLevel'] as String?))
.sorted((a, b) => b.compareTo(a))
.firstOrNull ??
JlptLevel.none;
final isCommon = commonEntryIds.contains(scoredEntryId.entryId);
final List<Map<String, Object?>> entrySenses = linearWordQueryData.senses
.where((element) => element['entryId'] == scoredEntryId.entryId)
.toList();
final List<Map<String, Object?>> entryReadingElements =
entryReadingElementsByEntryId[scoredEntryId.entryId] ?? const [];
final List<Map<String, Object?>> entryKanjiElements =
entryKanjiElementsByEntryId[scoredEntryId.entryId] ?? const [];
final List<Map<String, Object?>> entryReadingElementInfos =
entryReadingElementInfosByEntryId[scoredEntryId.entryId] ?? const [];
final List<Map<String, Object?>> entryKanjiElementInfos =
entryKanjiElementInfosByEntryId[scoredEntryId.entryId] ?? const [];
final List<Map<String, Object?>> entryReadingElementRestrictions =
entryReadingElementRestrictionsByEntryId[scoredEntryId.entryId] ??
const [];
final GroupedWordResult entryReadingElementsGrouped = _regroupWords(
entryId: scoredEntryId.entryId,
readingElements: entryReadingElements,
kanjiElements: entryKanjiElements,
readingElementInfos: linearWordQueryData.readingElementInfos,
readingElementRestrictions:
linearWordQueryData.readingElementRestrictions,
kanjiElementInfos: linearWordQueryData.kanjiElementInfos,
kanjiElementInfos: entryKanjiElementInfos,
readingElements: entryReadingElements,
readingElementInfos: entryReadingElementInfos,
readingElementRestrictions: entryReadingElementRestrictions,
);
final List<Map<String, Object?>> entrySenses =
entrySensesByEntryId[scoredEntryId.entryId] ?? const [];
final List<WordSearchSense> entrySensesGrouped = _regroupSenses(
senses: entrySenses,
senseAntonyms: linearWordQueryData.senseAntonyms,
@@ -82,6 +116,10 @@ List<WordSearchResult> regroupWordSearchResults({
senseAntonymsXrefData: linearWordQueryData.senseAntonymData,
);
final bool isCommon = commonEntryIds.contains(scoredEntryId.entryId);
final JlptLevel jlptLevel =
entryJlptTagsByEntryId[scoredEntryId.entryId] ?? JlptLevel.none;
results.add(
WordSearchResult(
score: scoredEntryId.score,
@@ -113,7 +151,6 @@ class GroupedWordResult {
}
GroupedWordResult _regroupWords({
required int entryId,
required List<Map<String, Object?>> kanjiElements,
required List<Map<String, Object?>> kanjiElementInfos,
required List<Map<String, Object?>> readingElements,
@@ -122,36 +159,34 @@ GroupedWordResult _regroupWords({
}) {
final List<WordSearchRuby> rubys = [];
final kanjiElements_ = kanjiElements
.where((element) => element['entryId'] == entryId)
.toList();
final Map<int, Set<String>> readingElementRestrictionsSet =
readingElementRestrictions
.groupSetsBy((element) => element['elementId'] as int)
.map(
(key, value) => MapEntry(
key,
value.map((e) => e['restriction'] as String).toSet(),
),
);
final readingElements_ = readingElements
.where((element) => element['entryId'] == entryId)
.toList();
final readingElementRestrictions_ = readingElementRestrictions
.where((element) => element['entryId'] == entryId)
.toList();
for (final readingElement in readingElements_) {
if (readingElement['doesNotMatchKanji'] == 1 || kanjiElements_.isEmpty) {
// Construct a cartesian product of kanji + readings, with exceptions made for items marked in `restrictions`.
for (final readingElement in readingElements) {
if (readingElement['doesNotMatchKanji'] == 1 || kanjiElements.isEmpty) {
final ruby = WordSearchRuby(base: readingElement['reading'] as String);
rubys.add(ruby);
continue;
}
for (final kanjiElement in kanjiElements_) {
for (final kanjiElement in kanjiElements) {
final kanji = kanjiElement['reading'] as String;
final reading = readingElement['reading'] as String;
final restrictions = readingElementRestrictions_
.where((element) => element['reading'] == reading)
.toList();
if (restrictions.isNotEmpty &&
!restrictions.any((element) => element['restriction'] == kanji)) {
// The 'restrictions' act as an allowlist, meaning that non-matching kanji elements should be ignored.
final restrictions =
readingElementRestrictionsSet[readingElement['elementId'] as int] ??
{};
if (restrictions.isNotEmpty && !restrictions.contains(kanji)) {
continue;
}
@@ -160,35 +195,30 @@ GroupedWordResult _regroupWords({
}
}
assert(rubys.isNotEmpty, 'No readings found for entryId: $entryId');
assert(
rubys.isNotEmpty,
'No readings found for entryId: ${kanjiElements.firstOrNull?['entryId'] ?? readingElements.firstOrNull?['entryId'] ?? '???'}',
);
final Map<int, String> readingElementIdsToReading = {
for (final element in readingElements_)
for (final element in readingElements)
element['elementId'] as int: element['reading'] as String,
};
final Map<int, String> kanjiElementIdsToReading = {
for (final element in kanjiElements_)
for (final element in kanjiElements)
element['elementId'] as int: element['reading'] as String,
};
final readingElementInfos_ = readingElementInfos
.where((element) => element['entryId'] == entryId)
.toList();
final kanjiElementInfos_ = kanjiElementInfos
.where((element) => element['entryId'] == entryId)
.toList();
return GroupedWordResult(
rubys: rubys,
readingInfos: {
for (final rei in readingElementInfos_)
for (final rei in readingElementInfos)
readingElementIdsToReading[rei['elementId'] as int]!:
JMdictReadingInfo.fromId(rei['info'] as String),
},
kanjiInfos: {
for (final kei in kanjiElementInfos_)
for (final kei in kanjiElementInfos)
kanjiElementIdsToReading[kei['elementId'] as int]!:
JMdictKanjiInfo.fromId(kei['info'] as String),
},

View File

@@ -5,18 +5,18 @@ packages:
dependency: transitive
description:
name: _fe_analyzer_shared
sha256: "8d718c5c58904f9937290fd5dbf2d6a0e02456867706bfb6cd7b81d394e738d5"
sha256: "3b19a47f6ea7c2632760777c78174f47f6aec1e05f0cd611380d4593b8af1dbc"
url: "https://pub.dev"
source: hosted
version: "98.0.0"
version: "96.0.0"
analyzer:
dependency: transitive
description:
name: analyzer
sha256: "6141ad5d092d1e1d13929c0504658bbeccc1703505830d7c26e859908f5efc88"
sha256: "0c516bc4ad36a1a75759e54d5047cb9d15cded4459df01aa35a0b5ec7db2c2a0"
url: "https://pub.dev"
source: hosted
version: "12.0.0"
version: "10.2.0"
args:
dependency: "direct main"
description:
@@ -29,10 +29,10 @@ packages:
dependency: transitive
description:
name: async
sha256: e2eb0491ba5ddb6177742d2da23904574082139b07c1e33b8503b9f46f3e1a37
sha256: "758e6d74e971c3e5aceb4110bfd6698efc7f501675bcfe0c775459a8140750eb"
url: "https://pub.dev"
source: hosted
version: "2.13.1"
version: "2.13.0"
benchmark_harness:
dependency: "direct dev"
description:
@@ -101,10 +101,10 @@ packages:
dependency: "direct main"
description:
name: csv
sha256: "2e0a52fb729f2faacd19c9c0c954ff450bba37aa8ab999410309e2342e7013a2"
sha256: bef2950f7a753eb82f894a2eabc3072e73cf21c17096296a5a992797e50b1d0d
url: "https://pub.dev"
source: hosted
version: "8.0.0"
version: "7.1.0"
equatable:
dependency: "direct main"
description:
@@ -149,10 +149,10 @@ packages:
dependency: transitive
description:
name: hooks
sha256: e79ed1e8e1929bc6ecb6ec85f0cb519c887aa5b423705ded0d0f2d9226def388
sha256: "7a08a0d684cb3b8fb604b78455d5d352f502b68079f7b80b831c62220ab0a4f6"
url: "https://pub.dev"
source: hosted
version: "1.0.2"
version: "1.0.1"
http_multi_server:
dependency: transitive
description:
@@ -197,18 +197,18 @@ packages:
dependency: transitive
description:
name: matcher
sha256: dc0b7dc7651697ea4ff3e69ef44b0407ea32c487a39fff6a4004fa585e901861
sha256: "12956d0ad8390bbcc63ca2e1469c0619946ccb52809807067a7020d57e647aa6"
url: "https://pub.dev"
source: hosted
version: "0.12.19"
version: "0.12.18"
meta:
dependency: transitive
description:
name: meta
sha256: df0c643f44ad098eb37988027a8e2b2b5a031fd3977f06bbfd3a76637e8df739
sha256: "9f29b9bcc8ee287b1a31e0d01be0eae99a930dbffdaecf04b3f3d82a969f296f"
url: "https://pub.dev"
source: hosted
version: "1.18.2"
version: "1.18.1"
mime:
dependency: transitive
description:
@@ -221,10 +221,10 @@ packages:
dependency: transitive
description:
name: native_toolchain_c
sha256: "6ba77bb18063eebe9de401f5e6437e95e1438af0a87a3a39084fbd37c90df572"
sha256: "89e83885ba09da5fdf2cdacc8002a712ca238c28b7f717910b34bcd27b0d03ac"
url: "https://pub.dev"
source: hosted
version: "0.17.6"
version: "0.17.4"
node_preamble:
dependency: transitive
description:
@@ -349,10 +349,10 @@ packages:
dependency: "direct main"
description:
name: sqlite3
sha256: caa693ad15a587a2b4fde093b728131a1827903872171089dedb16f7665d3a91
sha256: b7cf6b37667f6a921281797d2499ffc60fb878b161058d422064f0ddc78f6aa6
url: "https://pub.dev"
source: hosted
version: "3.2.0"
version: "3.1.6"
stack_trace:
dependency: transitive
description:
@@ -397,26 +397,26 @@ packages:
dependency: "direct dev"
description:
name: test
sha256: "8d9ceddbab833f180fbefed08afa76d7c03513dfdba87ffcec2718b02bbcbf20"
sha256: "54c516bbb7cee2754d327ad4fca637f78abfc3cbcc5ace83b3eda117e42cd71a"
url: "https://pub.dev"
source: hosted
version: "1.31.0"
version: "1.29.0"
test_api:
dependency: transitive
description:
name: test_api
sha256: "949a932224383300f01be9221c39180316445ecb8e7547f70a41a35bf421fb9e"
sha256: "93167629bfc610f71560ab9312acdda4959de4df6fac7492c89ff0d3886f6636"
url: "https://pub.dev"
source: hosted
version: "0.7.11"
version: "0.7.9"
test_core:
dependency: transitive
description:
name: test_core
sha256: "1991d4cfe85d5043241acac92962c3977c8d2f2add1ee73130c7b286417d1d34"
sha256: "394f07d21f0f2255ec9e3989f21e54d3c7dc0e6e9dbce160e5a9c1a6be0e2943"
url: "https://pub.dev"
source: hosted
version: "0.6.17"
version: "0.6.15"
typed_data:
dependency: transitive
description:

View File

@@ -9,7 +9,7 @@ environment:
dependencies:
args: ^2.7.0
collection: ^1.19.0
csv: ^8.0.0
csv: ^7.1.0
equatable: ^2.0.0
path: ^1.9.1
sqflite_common: ^2.5.0