Generate matchspans for word search results
Some checks failed
Run tests / evals (push) Failing after 12m29s
Build database / evals (push) Successful in 12m36s

This commit is contained in:
2026-02-24 16:54:37 +09:00
parent 00b963bfed
commit a696ed9733
4 changed files with 330 additions and 4 deletions

View File

@@ -0,0 +1,62 @@
enum WordSearchMatchSpanType { kanji, kana, sense }
/// A span of a word search result that corresponds to a match for a kanji, kana, or sense.
class WordSearchMatchSpan {
/// Which subtype of the word search result this span corresponds to - either a kanji, a kana, or a sense.
final WordSearchMatchSpanType spanType;
/// The index of the kanji/kana/sense in the word search result that this span corresponds to.
final int index;
/// When matching a 'sense', this is the index of the English definition in that sense that this span corresponds to. Otherwise, this is always 0.
final int subIndex;
/// The start of the span (inclusive)
final int start;
/// The end of the span (inclusive)
final int end;
WordSearchMatchSpan({
required this.spanType,
required this.index,
required this.start,
required this.end,
this.subIndex = 0,
});
@override
String toString() {
return 'WordSearchMatchSpan(spanType: $spanType, index: $index, start: $start, end: $end)';
}
Map<String, Object?> toJson() => {
'spanType': spanType.toString().split('.').last,
'index': index,
'start': start,
'end': end,
};
factory WordSearchMatchSpan.fromJson(Map<String, dynamic> json) =>
WordSearchMatchSpan(
spanType: WordSearchMatchSpanType.values.firstWhere(
(e) => e.toString().split('.').last == json['spanType'],
),
index: json['index'] as int,
start: json['start'] as int,
end: json['end'] as int,
);
@override
int get hashCode => Object.hash(spanType, index, start, end);
@override
bool operator ==(Object other) {
if (identical(this, other)) return true;
return other is WordSearchMatchSpan &&
other.spanType == spanType &&
other.index == index &&
other.start == start &&
other.end == end;
}
}

View File

@@ -1,9 +1,11 @@
import 'package:jadb/models/common/jlpt_level.dart';
import 'package:jadb/models/jmdict/jmdict_kanji_info.dart';
import 'package:jadb/models/jmdict/jmdict_reading_info.dart';
import 'package:jadb/models/word_search/word_search_match_span.dart';
import 'package:jadb/models/word_search/word_search_ruby.dart';
import 'package:jadb/models/word_search/word_search_sense.dart';
import 'package:jadb/models/word_search/word_search_sources.dart';
import 'package:jadb/search/word_search/word_search.dart';
/// A class representing a single dictionary entry from a word search.
class WordSearchResult {
@@ -34,11 +36,15 @@ class WordSearchResult {
/// A class listing the sources used to make up the data for this word search result.
final WordSearchSources sources;
// TODO: Create a list containing pointers to the matched parts of the word (either kanjiInfo, readingInfo, senses),
// as well as spans for the subpart of the string that matched. This will be used for highlighting, and displaying
// alternative kanji/kana forms later on.
/// A list of spans, specifying which part of this word result matched the search keyword.
///
/// Note that this is considered ephemeral data - it does not originate from the dictionary,
/// and unlike the rest of the class it varies based on external information (the searchword).
/// It will *NOT* be exported to JSON, but can be reinferred by invoking [inferMatchSpans] with
/// the original searchword.
List<WordSearchMatchSpan>? matchSpans;
const WordSearchResult({
WordSearchResult({
required this.score,
required this.entryId,
required this.isCommon,
@@ -48,6 +54,7 @@ class WordSearchResult {
required this.senses,
required this.jlptLevel,
required this.sources,
this.matchSpans,
});
Map<String, dynamic> toJson() => {
@@ -97,6 +104,65 @@ class WordSearchResult {
sources: WordSearchSources.empty(),
);
/// Infers which part(s) of this word search result matched the search keyword, and populates [matchSpans] accordingly.
void inferMatchSpans(
String searchword, {
SearchMode searchMode = SearchMode.Auto,
}) {
// TODO: handle wildcards like '?' and '*' when that becomes supported in the search.
// TODO: If the searchMode is provided, we can use that to narrow down which part of the word search results to look at.
final regex = RegExp(RegExp.escape(searchword));
final matchSpans = <WordSearchMatchSpan>[];
for (final (i, japanese) in japanese.indexed) {
final baseMatches = regex.allMatches(japanese.base);
matchSpans.addAll(
baseMatches.map(
(match) => WordSearchMatchSpan(
spanType: WordSearchMatchSpanType.kanji,
index: i,
start: match.start,
end: match.end,
),
),
);
if (japanese.furigana != null) {
final furiganaMatches = regex.allMatches(japanese.furigana!);
matchSpans.addAll(
furiganaMatches.map(
(match) => WordSearchMatchSpan(
spanType: WordSearchMatchSpanType.kana,
index: i,
start: match.start,
end: match.end,
),
),
);
}
}
for (final (i, sense) in senses.indexed) {
for (final (k, definition) in sense.englishDefinitions.indexed) {
final definitionMatches = regex.allMatches(definition);
matchSpans.addAll(
definitionMatches.map(
(match) => WordSearchMatchSpan(
spanType: WordSearchMatchSpanType.sense,
index: i,
subIndex: k,
start: match.start,
end: match.end,
),
),
);
}
}
this.matchSpans = matchSpans;
}
String _formatJapaneseWord(WordSearchRuby word) =>
word.furigana == null ? word.base : '${word.base} (${word.furigana})';

View File

@@ -52,6 +52,10 @@ Future<List<WordSearchResult>?> searchWordWithDbConnection(
linearWordQueryData: linearWordQueryData,
);
for (final resultEntry in result) {
resultEntry.inferMatchSpans(word, searchMode: searchMode);
}
return result;
}

View File

@@ -0,0 +1,194 @@
import 'package:jadb/models/common/jlpt_level.dart';
import 'package:jadb/models/word_search/word_search_match_span.dart';
import 'package:jadb/models/word_search/word_search_result.dart';
import 'package:jadb/models/word_search/word_search_ruby.dart';
import 'package:jadb/models/word_search/word_search_sense.dart';
import 'package:jadb/models/word_search/word_search_sources.dart';
import 'package:test/test.dart';
void main() {
test('Infer match whole word', () {
final wordSearchResult = WordSearchResult(
entryId: 0,
score: 0,
isCommon: false,
jlptLevel: JlptLevel.none,
kanjiInfo: {},
readingInfo: {},
japanese: [WordSearchRuby(base: '仮名')],
senses: [],
sources: WordSearchSources.empty(),
);
wordSearchResult.inferMatchSpans('仮名');
expect(wordSearchResult.matchSpans, [
WordSearchMatchSpan(
spanType: WordSearchMatchSpanType.kanji,
start: 0,
end: 2,
index: 0,
),
]);
});
test('Infer match part of word', () {
final wordSearchResult = WordSearchResult(
entryId: 0,
score: 0,
isCommon: false,
jlptLevel: JlptLevel.none,
kanjiInfo: {},
readingInfo: {},
japanese: [WordSearchRuby(base: '仮名')],
senses: [],
sources: WordSearchSources.empty(),
);
wordSearchResult.inferMatchSpans('');
expect(wordSearchResult.matchSpans, [
WordSearchMatchSpan(
spanType: WordSearchMatchSpanType.kanji,
start: 0,
end: 1,
index: 0,
),
]);
});
test('Infer match in middle of word', () {
final wordSearchResult = WordSearchResult(
entryId: 0,
score: 0,
isCommon: false,
jlptLevel: JlptLevel.none,
kanjiInfo: {},
readingInfo: {},
japanese: [WordSearchRuby(base: 'ありがとう')],
senses: [],
sources: WordSearchSources.empty(),
);
wordSearchResult.inferMatchSpans('りがと');
expect(wordSearchResult.matchSpans, [
WordSearchMatchSpan(
spanType: WordSearchMatchSpanType.kanji,
start: 1,
end: 4,
index: 0,
),
]);
});
test('Infer match in furigana', () {
final wordSearchResult = WordSearchResult(
entryId: 0,
score: 0,
isCommon: false,
jlptLevel: JlptLevel.none,
kanjiInfo: {},
readingInfo: {},
japanese: [WordSearchRuby(base: '仮名', furigana: 'かな')],
senses: [],
sources: WordSearchSources.empty(),
);
wordSearchResult.inferMatchSpans('かな');
expect(wordSearchResult.matchSpans, [
WordSearchMatchSpan(
spanType: WordSearchMatchSpanType.kana,
start: 0,
end: 2,
index: 0,
),
]);
});
test('Infer match in sense', () {
final wordSearchResult = WordSearchResult(
entryId: 0,
score: 0,
isCommon: false,
jlptLevel: JlptLevel.none,
kanjiInfo: {},
readingInfo: {},
japanese: [WordSearchRuby(base: '仮名')],
senses: [
WordSearchSense(
antonyms: [],
dialects: [],
englishDefinitions: ['kana'],
fields: [],
info: [],
languageSource: [],
misc: [],
partsOfSpeech: [],
restrictedToKanji: [],
restrictedToReading: [],
seeAlso: [],
),
],
sources: WordSearchSources.empty(),
);
wordSearchResult.inferMatchSpans('kana');
expect(wordSearchResult.matchSpans, [
WordSearchMatchSpan(
spanType: WordSearchMatchSpanType.sense,
start: 0,
end: 4,
index: 0,
),
]);
});
test('Infer multiple matches', () {
final wordSearchResult = WordSearchResult(
entryId: 0,
score: 0,
isCommon: false,
jlptLevel: JlptLevel.none,
kanjiInfo: {},
readingInfo: {},
japanese: [WordSearchRuby(base: '仮名', furigana: 'かな')],
senses: [
WordSearchSense(
antonyms: [],
dialects: [],
englishDefinitions: ['kana', 'the kana'],
fields: [],
info: [],
languageSource: [],
misc: [],
partsOfSpeech: [],
restrictedToKanji: [],
restrictedToReading: [],
seeAlso: [],
),
],
sources: WordSearchSources.empty(),
);
wordSearchResult.inferMatchSpans('kana');
expect(wordSearchResult.matchSpans, [
WordSearchMatchSpan(
spanType: WordSearchMatchSpanType.sense,
start: 0,
end: 4,
index: 0,
),
WordSearchMatchSpan(
spanType: WordSearchMatchSpanType.sense,
start: 4,
end: 8,
index: 0,
subIndex: 1,
),
]);
});
}