From a696ed9733f39d66fdab5d8707ef625074e7b7ae Mon Sep 17 00:00:00 2001 From: h7x4 Date: Tue, 24 Feb 2026 16:54:37 +0900 Subject: [PATCH] Generate matchspans for word search results --- .../word_search/word_search_match_span.dart | 62 ++++++ .../word_search/word_search_result.dart | 74 ++++++- lib/search/word_search/word_search.dart | 4 + test/search/search_match_inference_test.dart | 194 ++++++++++++++++++ 4 files changed, 330 insertions(+), 4 deletions(-) create mode 100644 lib/models/word_search/word_search_match_span.dart create mode 100644 test/search/search_match_inference_test.dart diff --git a/lib/models/word_search/word_search_match_span.dart b/lib/models/word_search/word_search_match_span.dart new file mode 100644 index 0000000..58c95e8 --- /dev/null +++ b/lib/models/word_search/word_search_match_span.dart @@ -0,0 +1,62 @@ +enum WordSearchMatchSpanType { kanji, kana, sense } + +/// A span of a word search result that corresponds to a match for a kanji, kana, or sense. +class WordSearchMatchSpan { + /// Which subtype of the word search result this span corresponds to - either a kanji, a kana, or a sense. + final WordSearchMatchSpanType spanType; + + /// The index of the kanji/kana/sense in the word search result that this span corresponds to. + final int index; + + /// When matching a 'sense', this is the index of the English definition in that sense that this span corresponds to. Otherwise, this is always 0. + final int subIndex; + + /// The start of the span (inclusive) + final int start; + + /// The end of the span (inclusive) + final int end; + + WordSearchMatchSpan({ + required this.spanType, + required this.index, + required this.start, + required this.end, + this.subIndex = 0, + }); + + @override + String toString() { + return 'WordSearchMatchSpan(spanType: $spanType, index: $index, start: $start, end: $end)'; + } + + Map toJson() => { + 'spanType': spanType.toString().split('.').last, + 'index': index, + 'start': start, + 'end': end, + }; + + factory WordSearchMatchSpan.fromJson(Map json) => + WordSearchMatchSpan( + spanType: WordSearchMatchSpanType.values.firstWhere( + (e) => e.toString().split('.').last == json['spanType'], + ), + index: json['index'] as int, + start: json['start'] as int, + end: json['end'] as int, + ); + + @override + int get hashCode => Object.hash(spanType, index, start, end); + + @override + bool operator ==(Object other) { + if (identical(this, other)) return true; + return other is WordSearchMatchSpan && + other.spanType == spanType && + other.index == index && + other.start == start && + other.end == end; + } +} diff --git a/lib/models/word_search/word_search_result.dart b/lib/models/word_search/word_search_result.dart index 261dc22..59bccd1 100644 --- a/lib/models/word_search/word_search_result.dart +++ b/lib/models/word_search/word_search_result.dart @@ -1,9 +1,11 @@ import 'package:jadb/models/common/jlpt_level.dart'; import 'package:jadb/models/jmdict/jmdict_kanji_info.dart'; import 'package:jadb/models/jmdict/jmdict_reading_info.dart'; +import 'package:jadb/models/word_search/word_search_match_span.dart'; import 'package:jadb/models/word_search/word_search_ruby.dart'; import 'package:jadb/models/word_search/word_search_sense.dart'; import 'package:jadb/models/word_search/word_search_sources.dart'; +import 'package:jadb/search/word_search/word_search.dart'; /// A class representing a single dictionary entry from a word search. class WordSearchResult { @@ -34,11 +36,15 @@ class WordSearchResult { /// A class listing the sources used to make up the data for this word search result. final WordSearchSources sources; - // TODO: Create a list containing pointers to the matched parts of the word (either kanjiInfo, readingInfo, senses), - // as well as spans for the subpart of the string that matched. This will be used for highlighting, and displaying - // alternative kanji/kana forms later on. + /// A list of spans, specifying which part of this word result matched the search keyword. + /// + /// Note that this is considered ephemeral data - it does not originate from the dictionary, + /// and unlike the rest of the class it varies based on external information (the searchword). + /// It will *NOT* be exported to JSON, but can be reinferred by invoking [inferMatchSpans] with + /// the original searchword. + List? matchSpans; - const WordSearchResult({ + WordSearchResult({ required this.score, required this.entryId, required this.isCommon, @@ -48,6 +54,7 @@ class WordSearchResult { required this.senses, required this.jlptLevel, required this.sources, + this.matchSpans, }); Map toJson() => { @@ -97,6 +104,65 @@ class WordSearchResult { sources: WordSearchSources.empty(), ); + /// Infers which part(s) of this word search result matched the search keyword, and populates [matchSpans] accordingly. + void inferMatchSpans( + String searchword, { + SearchMode searchMode = SearchMode.Auto, + }) { + // TODO: handle wildcards like '?' and '*' when that becomes supported in the search. + // TODO: If the searchMode is provided, we can use that to narrow down which part of the word search results to look at. + + final regex = RegExp(RegExp.escape(searchword)); + final matchSpans = []; + + for (final (i, japanese) in japanese.indexed) { + final baseMatches = regex.allMatches(japanese.base); + matchSpans.addAll( + baseMatches.map( + (match) => WordSearchMatchSpan( + spanType: WordSearchMatchSpanType.kanji, + index: i, + start: match.start, + end: match.end, + ), + ), + ); + + if (japanese.furigana != null) { + final furiganaMatches = regex.allMatches(japanese.furigana!); + matchSpans.addAll( + furiganaMatches.map( + (match) => WordSearchMatchSpan( + spanType: WordSearchMatchSpanType.kana, + index: i, + start: match.start, + end: match.end, + ), + ), + ); + } + } + + for (final (i, sense) in senses.indexed) { + for (final (k, definition) in sense.englishDefinitions.indexed) { + final definitionMatches = regex.allMatches(definition); + matchSpans.addAll( + definitionMatches.map( + (match) => WordSearchMatchSpan( + spanType: WordSearchMatchSpanType.sense, + index: i, + subIndex: k, + start: match.start, + end: match.end, + ), + ), + ); + } + } + + this.matchSpans = matchSpans; + } + String _formatJapaneseWord(WordSearchRuby word) => word.furigana == null ? word.base : '${word.base} (${word.furigana})'; diff --git a/lib/search/word_search/word_search.dart b/lib/search/word_search/word_search.dart index 1d7d8ed..86a8ce9 100644 --- a/lib/search/word_search/word_search.dart +++ b/lib/search/word_search/word_search.dart @@ -52,6 +52,10 @@ Future?> searchWordWithDbConnection( linearWordQueryData: linearWordQueryData, ); + for (final resultEntry in result) { + resultEntry.inferMatchSpans(word, searchMode: searchMode); + } + return result; } diff --git a/test/search/search_match_inference_test.dart b/test/search/search_match_inference_test.dart new file mode 100644 index 0000000..5145ed7 --- /dev/null +++ b/test/search/search_match_inference_test.dart @@ -0,0 +1,194 @@ +import 'package:jadb/models/common/jlpt_level.dart'; +import 'package:jadb/models/word_search/word_search_match_span.dart'; +import 'package:jadb/models/word_search/word_search_result.dart'; +import 'package:jadb/models/word_search/word_search_ruby.dart'; +import 'package:jadb/models/word_search/word_search_sense.dart'; +import 'package:jadb/models/word_search/word_search_sources.dart'; +import 'package:test/test.dart'; + +void main() { + test('Infer match whole word', () { + final wordSearchResult = WordSearchResult( + entryId: 0, + score: 0, + isCommon: false, + jlptLevel: JlptLevel.none, + kanjiInfo: {}, + readingInfo: {}, + japanese: [WordSearchRuby(base: '仮名')], + senses: [], + sources: WordSearchSources.empty(), + ); + + wordSearchResult.inferMatchSpans('仮名'); + + expect(wordSearchResult.matchSpans, [ + WordSearchMatchSpan( + spanType: WordSearchMatchSpanType.kanji, + start: 0, + end: 2, + index: 0, + ), + ]); + }); + + test('Infer match part of word', () { + final wordSearchResult = WordSearchResult( + entryId: 0, + score: 0, + isCommon: false, + jlptLevel: JlptLevel.none, + kanjiInfo: {}, + readingInfo: {}, + japanese: [WordSearchRuby(base: '仮名')], + senses: [], + sources: WordSearchSources.empty(), + ); + + wordSearchResult.inferMatchSpans('仮'); + + expect(wordSearchResult.matchSpans, [ + WordSearchMatchSpan( + spanType: WordSearchMatchSpanType.kanji, + start: 0, + end: 1, + index: 0, + ), + ]); + }); + + test('Infer match in middle of word', () { + final wordSearchResult = WordSearchResult( + entryId: 0, + score: 0, + isCommon: false, + jlptLevel: JlptLevel.none, + kanjiInfo: {}, + readingInfo: {}, + japanese: [WordSearchRuby(base: 'ありがとう')], + senses: [], + sources: WordSearchSources.empty(), + ); + + wordSearchResult.inferMatchSpans('りがと'); + + expect(wordSearchResult.matchSpans, [ + WordSearchMatchSpan( + spanType: WordSearchMatchSpanType.kanji, + start: 1, + end: 4, + index: 0, + ), + ]); + }); + + test('Infer match in furigana', () { + final wordSearchResult = WordSearchResult( + entryId: 0, + score: 0, + isCommon: false, + jlptLevel: JlptLevel.none, + kanjiInfo: {}, + readingInfo: {}, + japanese: [WordSearchRuby(base: '仮名', furigana: 'かな')], + senses: [], + sources: WordSearchSources.empty(), + ); + + wordSearchResult.inferMatchSpans('かな'); + + expect(wordSearchResult.matchSpans, [ + WordSearchMatchSpan( + spanType: WordSearchMatchSpanType.kana, + start: 0, + end: 2, + index: 0, + ), + ]); + }); + + test('Infer match in sense', () { + final wordSearchResult = WordSearchResult( + entryId: 0, + score: 0, + isCommon: false, + jlptLevel: JlptLevel.none, + kanjiInfo: {}, + readingInfo: {}, + japanese: [WordSearchRuby(base: '仮名')], + senses: [ + WordSearchSense( + antonyms: [], + dialects: [], + englishDefinitions: ['kana'], + fields: [], + info: [], + languageSource: [], + misc: [], + partsOfSpeech: [], + restrictedToKanji: [], + restrictedToReading: [], + seeAlso: [], + ), + ], + sources: WordSearchSources.empty(), + ); + + wordSearchResult.inferMatchSpans('kana'); + + expect(wordSearchResult.matchSpans, [ + WordSearchMatchSpan( + spanType: WordSearchMatchSpanType.sense, + start: 0, + end: 4, + index: 0, + ), + ]); + }); + + test('Infer multiple matches', () { + final wordSearchResult = WordSearchResult( + entryId: 0, + score: 0, + isCommon: false, + jlptLevel: JlptLevel.none, + kanjiInfo: {}, + readingInfo: {}, + japanese: [WordSearchRuby(base: '仮名', furigana: 'かな')], + senses: [ + WordSearchSense( + antonyms: [], + dialects: [], + englishDefinitions: ['kana', 'the kana'], + fields: [], + info: [], + languageSource: [], + misc: [], + partsOfSpeech: [], + restrictedToKanji: [], + restrictedToReading: [], + seeAlso: [], + ), + ], + sources: WordSearchSources.empty(), + ); + + wordSearchResult.inferMatchSpans('kana'); + + expect(wordSearchResult.matchSpans, [ + WordSearchMatchSpan( + spanType: WordSearchMatchSpanType.sense, + start: 0, + end: 4, + index: 0, + ), + WordSearchMatchSpan( + spanType: WordSearchMatchSpanType.sense, + start: 4, + end: 8, + index: 0, + subIndex: 1, + ), + ]); + }); +}