From 52e9954c714121cd35a4f43aa1b3154359ab1fb8 Mon Sep 17 00:00:00 2001 From: h7x4 Date: Wed, 4 Mar 2026 17:49:49 +0900 Subject: [PATCH] WIP: search/word_search: do a bit of performance optimization on result regrouping --- lib/search/word_search/regrouping.dart | 166 +++++++++++++++---------- 1 file changed, 98 insertions(+), 68 deletions(-) diff --git a/lib/search/word_search/regrouping.dart b/lib/search/word_search/regrouping.dart index 7f80b17..2434d7c 100644 --- a/lib/search/word_search/regrouping.dart +++ b/lib/search/word_search/regrouping.dart @@ -21,49 +21,83 @@ List regroupWordSearchResults({ }) { final List results = []; - final commonEntryIds = linearWordQueryData.commonEntries + final Set commonEntryIds = linearWordQueryData.commonEntries .map((entry) => entry['entryId'] as int) .toSet(); + final Map>> entryReadingElementsByEntryId = + linearWordQueryData.readingElements.groupListsBy( + (element) => element['entryId'] as int, + ); + + final Map>> entryKanjiElementsByEntryId = + linearWordQueryData.kanjiElements.groupListsBy( + (element) => element['entryId'] as int, + ); + + final Map elementIdToEntryId = { + for (final element in linearWordQueryData.readingElements) + element['elementId'] as int: element['entryId'] as int, + for (final element in linearWordQueryData.kanjiElements) + element['elementId'] as int: element['entryId'] as int, + }; + + final Map>> entryReadingElementInfosByEntryId = + linearWordQueryData.readingElementInfos.groupListsBy( + (element) => elementIdToEntryId[element['elementId'] as int]!, + ); + + final Map>> entryKanjiElementInfosByEntryId = + linearWordQueryData.kanjiElementInfos.groupListsBy( + (element) => elementIdToEntryId[element['elementId'] as int]!, + ); + + final Map>> + entryReadingElementRestrictionsByEntryId = linearWordQueryData + .readingElementRestrictions + .groupListsBy( + (element) => elementIdToEntryId[element['elementId'] as int]!, + ); + + final Map entryJlptTagsByEntryId = linearWordQueryData + .jlptTags + .groupSetsBy((element) => element['entryId'] as int) + .map( + (final key, final value) => MapEntry( + key, + value.map((e) => JlptLevel.fromString(e['jlptLevel'] as String?)).min, + ), + ); + + final Map>> entrySensesByEntryId = + linearWordQueryData.senses.groupListsBy( + (element) => element['entryId'] as int, + ); + for (final scoredEntryId in entryIds) { - final List> entryReadingElements = linearWordQueryData - .readingElements - .where((element) => element['entryId'] == scoredEntryId.entryId) - .toList(); - - final List> entryKanjiElements = linearWordQueryData - .kanjiElements - .where((element) => element['entryId'] == scoredEntryId.entryId) - .toList(); - - final List> entryJlptTags = linearWordQueryData - .jlptTags - .where((element) => element['entryId'] == scoredEntryId.entryId) - .toList(); - - final jlptLevel = - entryJlptTags - .map((e) => JlptLevel.fromString(e['jlptLevel'] as String?)) - .sorted((a, b) => b.compareTo(a)) - .firstOrNull ?? - JlptLevel.none; - - final isCommon = commonEntryIds.contains(scoredEntryId.entryId); - - final List> entrySenses = linearWordQueryData.senses - .where((element) => element['entryId'] == scoredEntryId.entryId) - .toList(); + final List> entryReadingElements = + entryReadingElementsByEntryId[scoredEntryId.entryId] ?? const []; + final List> entryKanjiElements = + entryKanjiElementsByEntryId[scoredEntryId.entryId] ?? const []; + final List> entryReadingElementInfos = + entryReadingElementInfosByEntryId[scoredEntryId.entryId] ?? const []; + final List> entryKanjiElementInfos = + entryKanjiElementInfosByEntryId[scoredEntryId.entryId] ?? const []; + final List> entryReadingElementRestrictions = + entryReadingElementRestrictionsByEntryId[scoredEntryId.entryId] ?? + const []; final GroupedWordResult entryReadingElementsGrouped = _regroupWords( - entryId: scoredEntryId.entryId, - readingElements: entryReadingElements, kanjiElements: entryKanjiElements, - readingElementInfos: linearWordQueryData.readingElementInfos, - readingElementRestrictions: - linearWordQueryData.readingElementRestrictions, - kanjiElementInfos: linearWordQueryData.kanjiElementInfos, + kanjiElementInfos: entryKanjiElementInfos, + readingElements: entryReadingElements, + readingElementInfos: entryReadingElementInfos, + readingElementRestrictions: entryReadingElementRestrictions, ); + final List> entrySenses = + entrySensesByEntryId[scoredEntryId.entryId] ?? const []; + final List entrySensesGrouped = _regroupSenses( senses: entrySenses, senseAntonyms: linearWordQueryData.senseAntonyms, @@ -82,6 +116,10 @@ List regroupWordSearchResults({ senseAntonymsXrefData: linearWordQueryData.senseAntonymData, ); + final bool isCommon = commonEntryIds.contains(scoredEntryId.entryId); + final JlptLevel jlptLevel = + entryJlptTagsByEntryId[scoredEntryId.entryId] ?? JlptLevel.none; + results.add( WordSearchResult( score: scoredEntryId.score, @@ -113,7 +151,6 @@ class GroupedWordResult { } GroupedWordResult _regroupWords({ - required int entryId, required List> kanjiElements, required List> kanjiElementInfos, required List> readingElements, @@ -122,36 +159,34 @@ GroupedWordResult _regroupWords({ }) { final List rubys = []; - final kanjiElements_ = kanjiElements - .where((element) => element['entryId'] == entryId) - .toList(); + final Map> readingElementRestrictionsSet = + readingElementRestrictions + .groupSetsBy((element) => element['elementId'] as int) + .map( + (key, value) => MapEntry( + key, + value.map((e) => e['restriction'] as String).toSet(), + ), + ); - final readingElements_ = readingElements - .where((element) => element['entryId'] == entryId) - .toList(); - - final readingElementRestrictions_ = readingElementRestrictions - .where((element) => element['entryId'] == entryId) - .toList(); - - for (final readingElement in readingElements_) { - if (readingElement['doesNotMatchKanji'] == 1 || kanjiElements_.isEmpty) { + // Construct a cartesian product of kanji + readings, with exceptions made for items marked in `restrictions`. + for (final readingElement in readingElements) { + if (readingElement['doesNotMatchKanji'] == 1 || kanjiElements.isEmpty) { final ruby = WordSearchRuby(base: readingElement['reading'] as String); rubys.add(ruby); continue; } - for (final kanjiElement in kanjiElements_) { + for (final kanjiElement in kanjiElements) { final kanji = kanjiElement['reading'] as String; final reading = readingElement['reading'] as String; - final restrictions = readingElementRestrictions_ - .where((element) => element['reading'] == reading) - .toList(); - - if (restrictions.isNotEmpty && - !restrictions.any((element) => element['restriction'] == kanji)) { + // The 'restrictions' act as an allowlist, meaning that non-matching kanji elements should be ignored. + final restrictions = + readingElementRestrictionsSet[readingElement['elementId'] as int] ?? + {}; + if (restrictions.isNotEmpty && !restrictions.contains(kanji)) { continue; } @@ -160,35 +195,30 @@ GroupedWordResult _regroupWords({ } } - assert(rubys.isNotEmpty, 'No readings found for entryId: $entryId'); + assert( + rubys.isNotEmpty, + 'No readings found for entryId: ${kanjiElements.firstOrNull?['entryId'] ?? readingElements.firstOrNull?['entryId'] ?? '???'}', + ); final Map readingElementIdsToReading = { - for (final element in readingElements_) + for (final element in readingElements) element['elementId'] as int: element['reading'] as String, }; final Map kanjiElementIdsToReading = { - for (final element in kanjiElements_) + for (final element in kanjiElements) element['elementId'] as int: element['reading'] as String, }; - final readingElementInfos_ = readingElementInfos - .where((element) => element['entryId'] == entryId) - .toList(); - - final kanjiElementInfos_ = kanjiElementInfos - .where((element) => element['entryId'] == entryId) - .toList(); - return GroupedWordResult( rubys: rubys, readingInfos: { - for (final rei in readingElementInfos_) + for (final rei in readingElementInfos) readingElementIdsToReading[rei['elementId'] as int]!: JMdictReadingInfo.fromId(rei['info'] as String), }, kanjiInfos: { - for (final kei in kanjiElementInfos_) + for (final kei in kanjiElementInfos) kanjiElementIdsToReading[kei['elementId'] as int]!: JMdictKanjiInfo.fromId(kei['info'] as String), },