Generate matchspans for word search results
This commit is contained in:
62
lib/models/word_search/word_search_match_span.dart
Normal file
62
lib/models/word_search/word_search_match_span.dart
Normal file
@@ -0,0 +1,62 @@
|
||||
enum WordSearchMatchSpanType { kanji, kana, sense }
|
||||
|
||||
/// A span of a word search result that corresponds to a match for a kanji, kana, or sense.
|
||||
class WordSearchMatchSpan {
|
||||
/// Which subtype of the word search result this span corresponds to - either a kanji, a kana, or a sense.
|
||||
final WordSearchMatchSpanType spanType;
|
||||
|
||||
/// The index of the kanji/kana/sense in the word search result that this span corresponds to.
|
||||
final int index;
|
||||
|
||||
/// When matching a 'sense', this is the index of the English definition in that sense that this span corresponds to. Otherwise, this is always 0.
|
||||
final int subIndex;
|
||||
|
||||
/// The start of the span (inclusive)
|
||||
final int start;
|
||||
|
||||
/// The end of the span (inclusive)
|
||||
final int end;
|
||||
|
||||
WordSearchMatchSpan({
|
||||
required this.spanType,
|
||||
required this.index,
|
||||
required this.start,
|
||||
required this.end,
|
||||
this.subIndex = 0,
|
||||
});
|
||||
|
||||
@override
|
||||
String toString() {
|
||||
return 'WordSearchMatchSpan(spanType: $spanType, index: $index, start: $start, end: $end)';
|
||||
}
|
||||
|
||||
Map<String, Object?> toJson() => {
|
||||
'spanType': spanType.toString().split('.').last,
|
||||
'index': index,
|
||||
'start': start,
|
||||
'end': end,
|
||||
};
|
||||
|
||||
factory WordSearchMatchSpan.fromJson(Map<String, dynamic> json) =>
|
||||
WordSearchMatchSpan(
|
||||
spanType: WordSearchMatchSpanType.values.firstWhere(
|
||||
(e) => e.toString().split('.').last == json['spanType'],
|
||||
),
|
||||
index: json['index'] as int,
|
||||
start: json['start'] as int,
|
||||
end: json['end'] as int,
|
||||
);
|
||||
|
||||
@override
|
||||
int get hashCode => Object.hash(spanType, index, start, end);
|
||||
|
||||
@override
|
||||
bool operator ==(Object other) {
|
||||
if (identical(this, other)) return true;
|
||||
return other is WordSearchMatchSpan &&
|
||||
other.spanType == spanType &&
|
||||
other.index == index &&
|
||||
other.start == start &&
|
||||
other.end == end;
|
||||
}
|
||||
}
|
||||
@@ -1,9 +1,11 @@
|
||||
import 'package:jadb/models/common/jlpt_level.dart';
|
||||
import 'package:jadb/models/jmdict/jmdict_kanji_info.dart';
|
||||
import 'package:jadb/models/jmdict/jmdict_reading_info.dart';
|
||||
import 'package:jadb/models/word_search/word_search_match_span.dart';
|
||||
import 'package:jadb/models/word_search/word_search_ruby.dart';
|
||||
import 'package:jadb/models/word_search/word_search_sense.dart';
|
||||
import 'package:jadb/models/word_search/word_search_sources.dart';
|
||||
import 'package:jadb/search/word_search/word_search.dart';
|
||||
|
||||
/// A class representing a single dictionary entry from a word search.
|
||||
class WordSearchResult {
|
||||
@@ -34,11 +36,15 @@ class WordSearchResult {
|
||||
/// A class listing the sources used to make up the data for this word search result.
|
||||
final WordSearchSources sources;
|
||||
|
||||
// TODO: Create a list containing pointers to the matched parts of the word (either kanjiInfo, readingInfo, senses),
|
||||
// as well as spans for the subpart of the string that matched. This will be used for highlighting, and displaying
|
||||
// alternative kanji/kana forms later on.
|
||||
/// A list of spans, specifying which part of this word result matched the search keyword.
|
||||
///
|
||||
/// Note that this is considered ephemeral data - it does not originate from the dictionary,
|
||||
/// and unlike the rest of the class it varies based on external information (the searchword).
|
||||
/// It will *NOT* be exported to JSON, but can be reinferred by invoking [inferMatchSpans] with
|
||||
/// the original searchword.
|
||||
List<WordSearchMatchSpan>? matchSpans;
|
||||
|
||||
const WordSearchResult({
|
||||
WordSearchResult({
|
||||
required this.score,
|
||||
required this.entryId,
|
||||
required this.isCommon,
|
||||
@@ -48,6 +54,7 @@ class WordSearchResult {
|
||||
required this.senses,
|
||||
required this.jlptLevel,
|
||||
required this.sources,
|
||||
this.matchSpans,
|
||||
});
|
||||
|
||||
Map<String, dynamic> toJson() => {
|
||||
@@ -97,6 +104,65 @@ class WordSearchResult {
|
||||
sources: WordSearchSources.empty(),
|
||||
);
|
||||
|
||||
/// Infers which part(s) of this word search result matched the search keyword, and populates [matchSpans] accordingly.
|
||||
void inferMatchSpans(
|
||||
String searchword, {
|
||||
SearchMode searchMode = SearchMode.Auto,
|
||||
}) {
|
||||
// TODO: handle wildcards like '?' and '*' when that becomes supported in the search.
|
||||
// TODO: If the searchMode is provided, we can use that to narrow down which part of the word search results to look at.
|
||||
|
||||
final regex = RegExp(RegExp.escape(searchword));
|
||||
final matchSpans = <WordSearchMatchSpan>[];
|
||||
|
||||
for (final (i, japanese) in japanese.indexed) {
|
||||
final baseMatches = regex.allMatches(japanese.base);
|
||||
matchSpans.addAll(
|
||||
baseMatches.map(
|
||||
(match) => WordSearchMatchSpan(
|
||||
spanType: WordSearchMatchSpanType.kanji,
|
||||
index: i,
|
||||
start: match.start,
|
||||
end: match.end,
|
||||
),
|
||||
),
|
||||
);
|
||||
|
||||
if (japanese.furigana != null) {
|
||||
final furiganaMatches = regex.allMatches(japanese.furigana!);
|
||||
matchSpans.addAll(
|
||||
furiganaMatches.map(
|
||||
(match) => WordSearchMatchSpan(
|
||||
spanType: WordSearchMatchSpanType.kana,
|
||||
index: i,
|
||||
start: match.start,
|
||||
end: match.end,
|
||||
),
|
||||
),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
for (final (i, sense) in senses.indexed) {
|
||||
for (final (k, definition) in sense.englishDefinitions.indexed) {
|
||||
final definitionMatches = regex.allMatches(definition);
|
||||
matchSpans.addAll(
|
||||
definitionMatches.map(
|
||||
(match) => WordSearchMatchSpan(
|
||||
spanType: WordSearchMatchSpanType.sense,
|
||||
index: i,
|
||||
subIndex: k,
|
||||
start: match.start,
|
||||
end: match.end,
|
||||
),
|
||||
),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
this.matchSpans = matchSpans;
|
||||
}
|
||||
|
||||
String _formatJapaneseWord(WordSearchRuby word) =>
|
||||
word.furigana == null ? word.base : '${word.base} (${word.furigana})';
|
||||
|
||||
|
||||
@@ -52,6 +52,10 @@ Future<List<WordSearchResult>?> searchWordWithDbConnection(
|
||||
linearWordQueryData: linearWordQueryData,
|
||||
);
|
||||
|
||||
for (final resultEntry in result) {
|
||||
resultEntry.inferMatchSpans(word, searchMode: searchMode);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
194
test/search/search_match_inference_test.dart
Normal file
194
test/search/search_match_inference_test.dart
Normal file
@@ -0,0 +1,194 @@
|
||||
import 'package:jadb/models/common/jlpt_level.dart';
|
||||
import 'package:jadb/models/word_search/word_search_match_span.dart';
|
||||
import 'package:jadb/models/word_search/word_search_result.dart';
|
||||
import 'package:jadb/models/word_search/word_search_ruby.dart';
|
||||
import 'package:jadb/models/word_search/word_search_sense.dart';
|
||||
import 'package:jadb/models/word_search/word_search_sources.dart';
|
||||
import 'package:test/test.dart';
|
||||
|
||||
void main() {
|
||||
test('Infer match whole word', () {
|
||||
final wordSearchResult = WordSearchResult(
|
||||
entryId: 0,
|
||||
score: 0,
|
||||
isCommon: false,
|
||||
jlptLevel: JlptLevel.none,
|
||||
kanjiInfo: {},
|
||||
readingInfo: {},
|
||||
japanese: [WordSearchRuby(base: '仮名')],
|
||||
senses: [],
|
||||
sources: WordSearchSources.empty(),
|
||||
);
|
||||
|
||||
wordSearchResult.inferMatchSpans('仮名');
|
||||
|
||||
expect(wordSearchResult.matchSpans, [
|
||||
WordSearchMatchSpan(
|
||||
spanType: WordSearchMatchSpanType.kanji,
|
||||
start: 0,
|
||||
end: 2,
|
||||
index: 0,
|
||||
),
|
||||
]);
|
||||
});
|
||||
|
||||
test('Infer match part of word', () {
|
||||
final wordSearchResult = WordSearchResult(
|
||||
entryId: 0,
|
||||
score: 0,
|
||||
isCommon: false,
|
||||
jlptLevel: JlptLevel.none,
|
||||
kanjiInfo: {},
|
||||
readingInfo: {},
|
||||
japanese: [WordSearchRuby(base: '仮名')],
|
||||
senses: [],
|
||||
sources: WordSearchSources.empty(),
|
||||
);
|
||||
|
||||
wordSearchResult.inferMatchSpans('仮');
|
||||
|
||||
expect(wordSearchResult.matchSpans, [
|
||||
WordSearchMatchSpan(
|
||||
spanType: WordSearchMatchSpanType.kanji,
|
||||
start: 0,
|
||||
end: 1,
|
||||
index: 0,
|
||||
),
|
||||
]);
|
||||
});
|
||||
|
||||
test('Infer match in middle of word', () {
|
||||
final wordSearchResult = WordSearchResult(
|
||||
entryId: 0,
|
||||
score: 0,
|
||||
isCommon: false,
|
||||
jlptLevel: JlptLevel.none,
|
||||
kanjiInfo: {},
|
||||
readingInfo: {},
|
||||
japanese: [WordSearchRuby(base: 'ありがとう')],
|
||||
senses: [],
|
||||
sources: WordSearchSources.empty(),
|
||||
);
|
||||
|
||||
wordSearchResult.inferMatchSpans('りがと');
|
||||
|
||||
expect(wordSearchResult.matchSpans, [
|
||||
WordSearchMatchSpan(
|
||||
spanType: WordSearchMatchSpanType.kanji,
|
||||
start: 1,
|
||||
end: 4,
|
||||
index: 0,
|
||||
),
|
||||
]);
|
||||
});
|
||||
|
||||
test('Infer match in furigana', () {
|
||||
final wordSearchResult = WordSearchResult(
|
||||
entryId: 0,
|
||||
score: 0,
|
||||
isCommon: false,
|
||||
jlptLevel: JlptLevel.none,
|
||||
kanjiInfo: {},
|
||||
readingInfo: {},
|
||||
japanese: [WordSearchRuby(base: '仮名', furigana: 'かな')],
|
||||
senses: [],
|
||||
sources: WordSearchSources.empty(),
|
||||
);
|
||||
|
||||
wordSearchResult.inferMatchSpans('かな');
|
||||
|
||||
expect(wordSearchResult.matchSpans, [
|
||||
WordSearchMatchSpan(
|
||||
spanType: WordSearchMatchSpanType.kana,
|
||||
start: 0,
|
||||
end: 2,
|
||||
index: 0,
|
||||
),
|
||||
]);
|
||||
});
|
||||
|
||||
test('Infer match in sense', () {
|
||||
final wordSearchResult = WordSearchResult(
|
||||
entryId: 0,
|
||||
score: 0,
|
||||
isCommon: false,
|
||||
jlptLevel: JlptLevel.none,
|
||||
kanjiInfo: {},
|
||||
readingInfo: {},
|
||||
japanese: [WordSearchRuby(base: '仮名')],
|
||||
senses: [
|
||||
WordSearchSense(
|
||||
antonyms: [],
|
||||
dialects: [],
|
||||
englishDefinitions: ['kana'],
|
||||
fields: [],
|
||||
info: [],
|
||||
languageSource: [],
|
||||
misc: [],
|
||||
partsOfSpeech: [],
|
||||
restrictedToKanji: [],
|
||||
restrictedToReading: [],
|
||||
seeAlso: [],
|
||||
),
|
||||
],
|
||||
sources: WordSearchSources.empty(),
|
||||
);
|
||||
|
||||
wordSearchResult.inferMatchSpans('kana');
|
||||
|
||||
expect(wordSearchResult.matchSpans, [
|
||||
WordSearchMatchSpan(
|
||||
spanType: WordSearchMatchSpanType.sense,
|
||||
start: 0,
|
||||
end: 4,
|
||||
index: 0,
|
||||
),
|
||||
]);
|
||||
});
|
||||
|
||||
test('Infer multiple matches', () {
|
||||
final wordSearchResult = WordSearchResult(
|
||||
entryId: 0,
|
||||
score: 0,
|
||||
isCommon: false,
|
||||
jlptLevel: JlptLevel.none,
|
||||
kanjiInfo: {},
|
||||
readingInfo: {},
|
||||
japanese: [WordSearchRuby(base: '仮名', furigana: 'かな')],
|
||||
senses: [
|
||||
WordSearchSense(
|
||||
antonyms: [],
|
||||
dialects: [],
|
||||
englishDefinitions: ['kana', 'the kana'],
|
||||
fields: [],
|
||||
info: [],
|
||||
languageSource: [],
|
||||
misc: [],
|
||||
partsOfSpeech: [],
|
||||
restrictedToKanji: [],
|
||||
restrictedToReading: [],
|
||||
seeAlso: [],
|
||||
),
|
||||
],
|
||||
sources: WordSearchSources.empty(),
|
||||
);
|
||||
|
||||
wordSearchResult.inferMatchSpans('kana');
|
||||
|
||||
expect(wordSearchResult.matchSpans, [
|
||||
WordSearchMatchSpan(
|
||||
spanType: WordSearchMatchSpanType.sense,
|
||||
start: 0,
|
||||
end: 4,
|
||||
index: 0,
|
||||
),
|
||||
WordSearchMatchSpan(
|
||||
spanType: WordSearchMatchSpanType.sense,
|
||||
start: 4,
|
||||
end: 8,
|
||||
index: 0,
|
||||
subIndex: 1,
|
||||
),
|
||||
]);
|
||||
});
|
||||
}
|
||||
Reference in New Issue
Block a user