Generate matchspans for word search results

2026-02-24 16:54:37 +09:00
parent 00b963bfed
commit a696ed9733
4 changed files with 330 additions and 4 deletions
@@ -0,0 +1,62 @@
+enum WordSearchMatchSpanType { kanji, kana, sense }
+
+/// A span of a word search result that corresponds to a match for a kanji, kana, or sense.
+class WordSearchMatchSpan {
+  /// Which subtype of the word search result this span corresponds to - either a kanji, a kana, or a sense.
+  final WordSearchMatchSpanType spanType;
+
+  /// The index of the kanji/kana/sense in the word search result that this span corresponds to.
+  final int index;
+
+  /// When matching a 'sense', this is the index of the English definition in that sense that this span corresponds to. Otherwise, this is always 0.
+  final int subIndex;
+
+  /// The start of the span (inclusive)
+  final int start;
+
+  /// The end of the span (inclusive)
+  final int end;
+
+  WordSearchMatchSpan({
+    required this.spanType,
+    required this.index,
+    required this.start,
+    required this.end,
+    this.subIndex = 0,
+  });
+
+  @override
+  String toString() {
+    return 'WordSearchMatchSpan(spanType: $spanType, index: $index, start: $start, end: $end)';
+  }
+
+  Map<String, Object?> toJson() => {
+    'spanType': spanType.toString().split('.').last,
+    'index': index,
+    'start': start,
+    'end': end,
+  };
+
+  factory WordSearchMatchSpan.fromJson(Map<String, dynamic> json) =>
+      WordSearchMatchSpan(
+        spanType: WordSearchMatchSpanType.values.firstWhere(
+          (e) => e.toString().split('.').last == json['spanType'],
+        ),
+        index: json['index'] as int,
+        start: json['start'] as int,
+        end: json['end'] as int,
+      );
+
+  @override
+  int get hashCode => Object.hash(spanType, index, start, end);
+
+  @override
+  bool operator ==(Object other) {
+    if (identical(this, other)) return true;
+    return other is WordSearchMatchSpan &&
+        other.spanType == spanType &&
+        other.index == index &&
+        other.start == start &&
+        other.end == end;
+  }
+}
@@ -1,9 +1,11 @@
 import 'package:jadb/models/common/jlpt_level.dart';
 import 'package:jadb/models/jmdict/jmdict_kanji_info.dart';
 import 'package:jadb/models/jmdict/jmdict_reading_info.dart';
+import 'package:jadb/models/word_search/word_search_match_span.dart';
 import 'package:jadb/models/word_search/word_search_ruby.dart';
 import 'package:jadb/models/word_search/word_search_sense.dart';
 import 'package:jadb/models/word_search/word_search_sources.dart';
+import 'package:jadb/search/word_search/word_search.dart';

 /// A class representing a single dictionary entry from a word search.
 class WordSearchResult {
@@ -34,11 +36,15 @@ class WordSearchResult {
  /// A class listing the sources used to make up the data for this word search result.
  final WordSearchSources sources;

-  // TODO: Create a list containing pointers to the matched parts of the word (either kanjiInfo, readingInfo, senses),
-  //       as well as spans for the subpart of the string that matched. This will be used for highlighting, and displaying
-  //       alternative kanji/kana forms later on.
+  /// A list of spans, specifying which part of this word result matched the search keyword.
+  ///
+  /// Note that this is considered ephemeral data - it does not originate from the dictionary,
+  /// and unlike the rest of the class it varies based on external information (the searchword).
+  /// It will *NOT* be exported to JSON, but can be reinferred by invoking [inferMatchSpans] with
+  /// the original searchword.
+  List<WordSearchMatchSpan>? matchSpans;

-  const WordSearchResult({
+  WordSearchResult({
    required this.score,
    required this.entryId,
    required this.isCommon,
@@ -48,6 +54,7 @@ class WordSearchResult {
    required this.senses,
    required this.jlptLevel,
    required this.sources,
+    this.matchSpans,
  });

  Map<String, dynamic> toJson() => {
@@ -97,6 +104,65 @@ class WordSearchResult {
    sources: WordSearchSources.empty(),
  );

+  /// Infers which part(s) of this word search result matched the search keyword, and populates [matchSpans] accordingly.
+  void inferMatchSpans(
+    String searchword, {
+    SearchMode searchMode = SearchMode.Auto,
+  }) {
+    // TODO: handle wildcards like '?' and '*' when that becomes supported in the search.
+    // TODO: If the searchMode is provided, we can use that to narrow down which part of the word search results to look at.
+
+    final regex = RegExp(RegExp.escape(searchword));
+    final matchSpans = <WordSearchMatchSpan>[];
+
+    for (final (i, japanese) in japanese.indexed) {
+      final baseMatches = regex.allMatches(japanese.base);
+      matchSpans.addAll(
+        baseMatches.map(
+          (match) => WordSearchMatchSpan(
+            spanType: WordSearchMatchSpanType.kanji,
+            index: i,
+            start: match.start,
+            end: match.end,
+          ),
+        ),
+      );
+
+      if (japanese.furigana != null) {
+        final furiganaMatches = regex.allMatches(japanese.furigana!);
+        matchSpans.addAll(
+          furiganaMatches.map(
+            (match) => WordSearchMatchSpan(
+              spanType: WordSearchMatchSpanType.kana,
+              index: i,
+              start: match.start,
+              end: match.end,
+            ),
+          ),
+        );
+      }
+    }
+
+    for (final (i, sense) in senses.indexed) {
+      for (final (k, definition) in sense.englishDefinitions.indexed) {
+        final definitionMatches = regex.allMatches(definition);
+        matchSpans.addAll(
+          definitionMatches.map(
+            (match) => WordSearchMatchSpan(
+              spanType: WordSearchMatchSpanType.sense,
+              index: i,
+              subIndex: k,
+              start: match.start,
+              end: match.end,
+            ),
+          ),
+        );
+      }
+    }
+
+    this.matchSpans = matchSpans;
+  }
+
  String _formatJapaneseWord(WordSearchRuby word) =>
      word.furigana == null ? word.base : '${word.base} (${word.furigana})';

@@ -52,6 +52,10 @@ Future<List<WordSearchResult>?> searchWordWithDbConnection(
    linearWordQueryData: linearWordQueryData,
  );

+  for (final resultEntry in result) {
+    resultEntry.inferMatchSpans(word, searchMode: searchMode);
+  }
+
  return result;
 }

@@ -0,0 +1,194 @@
+import 'package:jadb/models/common/jlpt_level.dart';
+import 'package:jadb/models/word_search/word_search_match_span.dart';
+import 'package:jadb/models/word_search/word_search_result.dart';
+import 'package:jadb/models/word_search/word_search_ruby.dart';
+import 'package:jadb/models/word_search/word_search_sense.dart';
+import 'package:jadb/models/word_search/word_search_sources.dart';
+import 'package:test/test.dart';
+
+void main() {
+  test('Infer match whole word', () {
+    final wordSearchResult = WordSearchResult(
+      entryId: 0,
+      score: 0,
+      isCommon: false,
+      jlptLevel: JlptLevel.none,
+      kanjiInfo: {},
+      readingInfo: {},
+      japanese: [WordSearchRuby(base: '仮名')],
+      senses: [],
+      sources: WordSearchSources.empty(),
+    );
+
+    wordSearchResult.inferMatchSpans('仮名');
+
+    expect(wordSearchResult.matchSpans, [
+      WordSearchMatchSpan(
+        spanType: WordSearchMatchSpanType.kanji,
+        start: 0,
+        end: 2,
+        index: 0,
+      ),
+    ]);
+  });
+
+  test('Infer match part of word', () {
+    final wordSearchResult = WordSearchResult(
+      entryId: 0,
+      score: 0,
+      isCommon: false,
+      jlptLevel: JlptLevel.none,
+      kanjiInfo: {},
+      readingInfo: {},
+      japanese: [WordSearchRuby(base: '仮名')],
+      senses: [],
+      sources: WordSearchSources.empty(),
+    );
+
+    wordSearchResult.inferMatchSpans('仮');
+
+    expect(wordSearchResult.matchSpans, [
+      WordSearchMatchSpan(
+        spanType: WordSearchMatchSpanType.kanji,
+        start: 0,
+        end: 1,
+        index: 0,
+      ),
+    ]);
+  });
+
+  test('Infer match in middle of word', () {
+    final wordSearchResult = WordSearchResult(
+      entryId: 0,
+      score: 0,
+      isCommon: false,
+      jlptLevel: JlptLevel.none,
+      kanjiInfo: {},
+      readingInfo: {},
+      japanese: [WordSearchRuby(base: 'ありがとう')],
+      senses: [],
+      sources: WordSearchSources.empty(),
+    );
+
+    wordSearchResult.inferMatchSpans('りがと');
+
+    expect(wordSearchResult.matchSpans, [
+      WordSearchMatchSpan(
+        spanType: WordSearchMatchSpanType.kanji,
+        start: 1,
+        end: 4,
+        index: 0,
+      ),
+    ]);
+  });
+
+  test('Infer match in furigana', () {
+    final wordSearchResult = WordSearchResult(
+      entryId: 0,
+      score: 0,
+      isCommon: false,
+      jlptLevel: JlptLevel.none,
+      kanjiInfo: {},
+      readingInfo: {},
+      japanese: [WordSearchRuby(base: '仮名', furigana: 'かな')],
+      senses: [],
+      sources: WordSearchSources.empty(),
+    );
+
+    wordSearchResult.inferMatchSpans('かな');
+
+    expect(wordSearchResult.matchSpans, [
+      WordSearchMatchSpan(
+        spanType: WordSearchMatchSpanType.kana,
+        start: 0,
+        end: 2,
+        index: 0,
+      ),
+    ]);
+  });
+
+  test('Infer match in sense', () {
+    final wordSearchResult = WordSearchResult(
+      entryId: 0,
+      score: 0,
+      isCommon: false,
+      jlptLevel: JlptLevel.none,
+      kanjiInfo: {},
+      readingInfo: {},
+      japanese: [WordSearchRuby(base: '仮名')],
+      senses: [
+        WordSearchSense(
+          antonyms: [],
+          dialects: [],
+          englishDefinitions: ['kana'],
+          fields: [],
+          info: [],
+          languageSource: [],
+          misc: [],
+          partsOfSpeech: [],
+          restrictedToKanji: [],
+          restrictedToReading: [],
+          seeAlso: [],
+        ),
+      ],
+      sources: WordSearchSources.empty(),
+    );
+
+    wordSearchResult.inferMatchSpans('kana');
+
+    expect(wordSearchResult.matchSpans, [
+      WordSearchMatchSpan(
+        spanType: WordSearchMatchSpanType.sense,
+        start: 0,
+        end: 4,
+        index: 0,
+      ),
+    ]);
+  });
+
+  test('Infer multiple matches', () {
+    final wordSearchResult = WordSearchResult(
+      entryId: 0,
+      score: 0,
+      isCommon: false,
+      jlptLevel: JlptLevel.none,
+      kanjiInfo: {},
+      readingInfo: {},
+      japanese: [WordSearchRuby(base: '仮名', furigana: 'かな')],
+      senses: [
+        WordSearchSense(
+          antonyms: [],
+          dialects: [],
+          englishDefinitions: ['kana', 'the kana'],
+          fields: [],
+          info: [],
+          languageSource: [],
+          misc: [],
+          partsOfSpeech: [],
+          restrictedToKanji: [],
+          restrictedToReading: [],
+          seeAlso: [],
+        ),
+      ],
+      sources: WordSearchSources.empty(),
+    );
+
+    wordSearchResult.inferMatchSpans('kana');
+
+    expect(wordSearchResult.matchSpans, [
+      WordSearchMatchSpan(
+        spanType: WordSearchMatchSpanType.sense,
+        start: 0,
+        end: 4,
+        index: 0,
+      ),
+      WordSearchMatchSpan(
+        spanType: WordSearchMatchSpanType.sense,
+        start: 4,
+        end: 8,
+        index: 0,
+        subIndex: 1,
+      ),
+    ]);
+  });
+}