WIP

2026-04-13 21:10:56 +09:00
12 changed files with 28 additions and 144 deletions
@@ -16,14 +16,15 @@ jobs:
      uses: https://github.com/cachix/install-nix-action@v31
      with:
        extra_nix_config: |
+          experimental-features = nix-command flakes
          show-trace = true
          max-jobs = auto
          trusted-users = root
          experimental-features = nix-command flakes
          build-users-group =

-    # - name: Update database inputs
-    #   run: nix flake update datasources
+    - name: Update database inputs
+      run: nix flake update datasources

    - name: Build database
      run: nix build .#database -L
@@ -22,11 +22,11 @@
    },
    "nixpkgs": {
      "locked": {
-        "lastModified": 1777954456,
-        "narHash": "sha256-hGdgeU2Nk87RAuZyYjyDjFL6LK7dAZN5RE9+hrDTkDU=",
+        "lastModified": 1775423009,
+        "narHash": "sha256-vPKLpjhIVWdDrfiUM8atW6YkIggCEKdSAlJPzzhkQlw=",
        "owner": "NixOS",
        "repo": "nixpkgs",
-        "rev": "549bd84d6279f9852cae6225e372cc67fb91a4c1",
+        "rev": "68d8aa3d661f0e6bd5862291b5bb263b2a6595c9",
        "type": "github"
      },
      "original": {
@@ -116,8 +116,6 @@
        ln -s ${src} $out
      '';

-      fts5-icu-tokenizer = pkgs.callPackage ./nix/fts5-icu-tokenizer/package.nix { };
-
      inherit (datasources.packages.${system}) jmdict radkfile kanjidic2;

      database-tool = pkgs.callPackage ./nix/database_tool.nix {
@@ -27,44 +27,15 @@ ResolvedXref resolveXref(
  SplayTreeMap<String, Set<Entry>> entriesByReading,
  XRefParts xref,
 ) {
-  late List<Entry> candidateEntries;
-  switch ((xref.kanjiRef, xref.readingRef)) {
-    case (null, null):
-      throw Exception('Xref $xref has no kanji or reading reference');
-
-    case (final String k, null):
-      if (!entriesByKanji.containsKey(k)) {
-        throw Exception(
-          'Xref $xref has kanji reference "$k" but no entries found with that kanji',
-        );
-      }
-      candidateEntries = entriesByKanji[k]!.toList();
-      break;
-
-    case (null, final String r):
-      if (!entriesByReading.containsKey(r)) {
-        throw Exception(
-          'Xref $xref has reading reference "$r" but no entries found with that reading',
-        );
-      }
-      candidateEntries = entriesByReading[r]!.toList();
-      break;
-
-    case (final String k, final String r):
-      if (!entriesByKanji.containsKey(k)) {
-        throw Exception(
-          'Xref $xref has kanji reference "$k" but no entries found with that kanji',
-        );
-      }
-      if (!entriesByReading.containsKey(r)) {
-        throw Exception(
-          'Xref $xref has reading reference "$r" but no entries found with that reading',
-        );
-      }
-      candidateEntries = entriesByKanji[k]!
-          .intersection(entriesByReading[r]!)
-          .toList();
-  }
+  List<Entry> candidateEntries = switch ((xref.kanjiRef, xref.readingRef)) {
+    (null, null) => throw Exception(
+      'Xref $xref has no kanji or reading reference',
+    ),
+    (final String k, null) => entriesByKanji[k]!.toList(),
+    (null, final String r) => entriesByReading[r]!.toList(),
+    (final String k, final String r) =>
+      entriesByKanji[k]!.intersection(entriesByReading[r]!).toList(),
+  };

  // Filter out entries that don't have the number of senses specified in the xref
  if (xref.senseOrderNum != null) {
@@ -18,12 +18,12 @@ extension JaDBConnection on DatabaseExecutor {
      searchKanjiWithDbConnection(this, kanji);

  /// Search for a kanji in the database.
-  Future<Map<String, KanjiSearchResult>> jadbGetManyKanji(Iterable<String> kanji) =>
+  Future<Map<String, KanjiSearchResult>> jadbGetManyKanji(Set<String> kanji) =>
      searchManyKanjiWithDbConnection(this, kanji);

  /// Filter a list of characters, and return the ones that are listed in the kanji dictionary.
  Future<List<String>> filterKanji(
-    Iterable<String> kanji, {
+    List<String> kanji, {
    bool deduplicate = false,
  }) => filterKanjiWithDbConnection(this, kanji, deduplicate);

@@ -6,7 +6,7 @@ import 'package:sqflite_common/sqflite.dart';
 /// If [deduplicate] is true, the returned list will deduplicate the input kanji list before returning the filtered results.
 Future<List<String>> filterKanjiWithDbConnection(
  DatabaseExecutor connection,
-  Iterable<String> kanji,
+  List<String> kanji,
  bool deduplicate,
 ) async {
  final Set<String> filteredKanji = await connection
@@ -14,7 +14,7 @@ Future<List<String>> filterKanjiWithDbConnection(
      SELECT "literal"
      FROM "${KANJIDICTableNames.character}"
      WHERE "literal" IN (${kanji.map((_) => '?').join(',')})
-    ''', kanji.toList())
+    ''', kanji)
      .then((value) => value.map((e) => e['literal'] as String).toSet());

  if (deduplicate) {
@@ -274,7 +274,7 @@ Future<KanjiSearchResult?> searchKanjiWithDbConnection(
 /// Searches for multiple kanji at once, returning a map of kanji to their search results.
 Future<Map<String, KanjiSearchResult>> searchManyKanjiWithDbConnection(
  DatabaseExecutor connection,
-  Iterable<String> kanji,
+  Set<String> kanji,
 ) async {
  if (kanji.isEmpty) {
    return {};
@@ -229,7 +229,7 @@ CREATE TABLE "JMdict_SenseGlossary" (
  PRIMARY KEY ("senseId", "phrase")
 ) WITHOUT ROWID;

-CREATE INDEX "JMdict_SenseGlossary_byPhrase" ON JMdict_SenseGlossary("phrase");
+-- CREATE INDEX "JMdict_SenseGlossary_byPhrase" ON JMdict_SenseGlossary("phrase");

 CREATE TABLE "JMdict_SenseGlossaryType" (
  "senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("senseId"),
@@ -1,6 +1,6 @@
 CREATE TABLE "JMdict_EntryScore" (
  "elementId" INTEGER PRIMARY KEY,
-  "score" INTEGER NOT NULL,
+  "score" INTEGER NOT NULL DEFAULT 0,
  "common" BOOLEAN NOT NULL DEFAULT FALSE,

  "entryId" INTEGER NOT NULL GENERATED ALWAYS AS (("elementId" / 100) % 10000000) STORED,
@@ -15,7 +15,8 @@ CREATE TABLE "JMdict_EntryScore" (
 CREATE INDEX "JMdict_EntryScore_byElementId_byScore" ON "JMdict_EntryScore"("elementId", "score");
 CREATE INDEX "JMdict_EntryScore_byScore" ON "JMdict_EntryScore"("score");

-CREATE INDEX "JMdict_EntryScore_byCommon" ON "JMdict_EntryScore"("common") WHERE "common" = TRUE;
+CREATE INDEX "JMdict_EntryScore_byElementId_byCommon" ON "JMdict_EntryScore"("elementId", "common");
+CREATE INDEX "JMdict_EntryScore_byCommon" ON "JMdict_EntryScore"("common");

 -- NOTE: these views are deduplicated in order not to perform an unnecessary
 --       UNION on every trigger
@@ -1,20 +0,0 @@
-diff --git i/CMakeLists.txt w/CMakeLists.txt
-index 9d99543..11ce4a4 100644
--- i/CMakeLists.txt
-+++ w/CMakeLists.txt
-@@ -131,6 +131,15 @@ if(NOT SQLite3_FOUND)
-   endif()
- endif()
- 
-+if(SQLite3_FOUND AND NOT TARGET SQLite3::SQLite3)
-+  add_library(SQLite3::SQLite3 UNKNOWN IMPORTED)
-+
-+  set_target_properties(SQLite3::SQLite3 PROPERTIES
-+    IMPORTED_LOCATION "${SQLite3_LIBRARIES}"
-+    INTERFACE_INCLUDE_DIRECTORIES "${SQLite3_INCLUDE_DIRS}"
-+  )
-+endif()
-+
- # --- Configure the Library ---
- 
- # Select source files based on API version
@@ -1,40 +0,0 @@
-{
-  lib,
-  stdenv,
-  fetchFromSourcehut,
-  cmake,
-  pkg-config,
-  icu,
-  sqlite,
-}:
-
-stdenv.mkDerivation (finalAttrs: {
-  pname = "fts5-icu-tokenizer";
-  version = "5.5";
-  src = fetchFromSourcehut {
-    vc = "hg";
-    owner = "~cwt";
-    repo = "fts5-icu-tokenizer";
-    rev = "v${finalAttrs.version}";
-    hash = "sha256-7Klsu9d1sY+W0buo6kwYdCyDA/u2dBTgu6WuttomTBo=";
-  };
-
-  patches = [
-    ./0001-provide-sqlite-externally.patch
-  ];
-
-  nativeBuildInputs = [
-    cmake
-    pkg-config
-  ];
-
-  buildInputs = [
-    icu
-    sqlite
-  ];
-
-  cmakeFlags = [
-    (lib.cmakeFeature "LOCALE" "ja")
-    (lib.cmakeFeature "API_VERSION" "v2")
-  ];
-})
@@ -8,7 +8,7 @@ void main() {
      expect(result, 'かたまり');
    });

-    test('Basic test with dakuten', () {
+    test('Basic test with diacritics', () {
      final result = transliterateLatinToHiragana('gadamari');
      expect(result, 'がだまり');
    });
@@ -54,7 +54,7 @@ void main() {

    test('Basic test', expectSpans('katamari', ['か', 'た', 'ま', 'り']));
    test(
-      'Basic test with dakuten',
+      'Basic test with diacritics',
      expectSpans('gadamari', ['が', 'だ', 'ま', 'り']),
    );
    test('wi and we', expectSpans('wiwe', ['うぃ', 'うぇ']));
@@ -72,7 +72,7 @@ void main() {
      expect(result, 'katamari');
    });

-    test('Basic test with dakuten', () {
+    test('Basic test with diacritics', () {
      final result = transliterateHiraganaToLatin('がだまり');
      expect(result, 'gadamari');
    });
@@ -91,21 +91,6 @@ void main() {
      final result = transliterateHiraganaToLatin('かっぱ');
      expect(result, 'kappa');
    });
-
-    test('Iteration mark', () {
-      final result = transliterateHiraganaToLatin('さゝき');
-      expect(result, 'sasaki');
-    }, skip: 'Not yet implemented');
-
-    test('Iteration mark with dakuten', () {
-      final result = transliterateHiraganaToLatin('あひゞき');
-      expect(result, 'ahibiki');
-    }, skip: 'Not yet implemented');
-
-    test('Yori', () {
-      final result = transliterateHiraganaToLatin('ゟ');
-      expect(result, 'yori');
-    }, skip: 'Not yet implemented');
  });

  group('Hiragana -> Romaji Spans', () {
@@ -125,7 +110,7 @@ void main() {

    test('Basic test', expectSpans('かたまり', ['ka', 'ta', 'ma', 'ri']));
    test(
-      'Basic test with dakuten',
+      'Basic test with diacritics',
      expectSpans('がだまり', ['ga', 'da', 'ma', 'ri']),
    );
    test('wi and we', expectSpans('うぃうぇ', ['whi', 'whe']));
@@ -133,17 +118,5 @@ void main() {

    // TODO: fix the implementation
    // test('Double consonant', expectSpans('かっぱ', ['ka', 'ppa']));
-
-    test(
-      'Iteration mark',
-      expectSpans('さゝき', ['sa', 'sa', 'ki']),
-      skip: 'Not yet implemented',
-    );
-    test(
-      'Iteration mark with dakuten',
-      expectSpans('あひゞき', ['a', 'hi', 'bi', 'ki']),
-      skip: 'Not yet implemented',
-    );
-    test('Yori', expectSpans('ゟ', ['yori']), skip: 'Not yet implemented');
  });
 }