Compare commits
46 Commits
mugiten-v0
...
word-regro
| Author | SHA1 | Date | |
|---|---|---|---|
|
52e9954c71
|
|||
|
0f7854a4fc
|
|||
|
a86f857553
|
|||
|
d14e3909d4
|
|||
|
bb44bf786a
|
|||
|
ad3343a01e
|
|||
|
16d72e94ba
|
|||
|
b070a1fd31
|
|||
|
dcf5c8ebe7
|
|||
|
1f8bc8bac5
|
|||
|
ab28b5788b
|
|||
|
dd7b2917dc
|
|||
|
74798c77b5
|
|||
|
63a4caa626
|
|||
|
374be5ca6b
|
|||
|
4a6fd41f31
|
|||
|
c06fff9e5a
|
|||
|
1d9928ade1
|
|||
|
1a3b04be00
|
|||
|
c0c6f97a01
|
|||
|
a954188d5d
|
|||
|
5b86d6eb67
|
|||
|
72f31e974b
|
|||
|
e824dc0a22
|
|||
|
f5bca61839
|
|||
|
056aaaa0ce
|
|||
|
a696ed9733
|
|||
|
00b963bfed
|
|||
|
4376012f18
|
|||
|
8ae1d882a0
|
|||
|
81db60ccf7
|
|||
|
f57cc68ef3
|
|||
|
48f50628a1
|
|||
|
1783338b2a
|
|||
|
e92e99922b
|
|||
|
05b56466e7
|
|||
|
33016ca751
|
|||
|
98d92d370d
|
|||
|
5252936bdc
|
|||
|
ac0cb14bbe
|
|||
|
49a86f60ea
|
|||
|
9472156feb
|
|||
|
4fbdba604e
|
|||
|
0cdfa2015e
|
|||
|
a9ca9b08a5
|
|||
|
45e8181041
|
71
.gitea/workflows/build-and-test.yml
Normal file
71
.gitea/workflows/build-and-test.yml
Normal file
@@ -0,0 +1,71 @@
|
||||
name: "Build and test"
|
||||
on:
|
||||
workflow_dispatch:
|
||||
pull_request:
|
||||
push:
|
||||
jobs:
|
||||
evals:
|
||||
runs-on: debian-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: Install sudo
|
||||
run: apt-get update && apt-get -y install sudo
|
||||
|
||||
- name: Install nix
|
||||
uses: https://github.com/cachix/install-nix-action@v31
|
||||
with:
|
||||
extra_nix_config: |
|
||||
experimental-features = nix-command flakes
|
||||
show-trace = true
|
||||
max-jobs = auto
|
||||
trusted-users = root
|
||||
experimental-features = nix-command flakes
|
||||
build-users-group =
|
||||
|
||||
- name: Update database inputs
|
||||
run: |
|
||||
nix flake update jmdict-src
|
||||
nix flake update jmdict-with-examples-src
|
||||
nix flake update radkfile-src
|
||||
nix flake update kanjidic2-src
|
||||
|
||||
- name: Build database
|
||||
run: nix build .#database -L
|
||||
|
||||
- name: Upload database as artifact
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: jadb-${{ gitea.sha }}.zip
|
||||
path: result/jadb.sqlite
|
||||
if-no-files-found: error
|
||||
retention-days: 15
|
||||
# Already compressed
|
||||
compression: 0
|
||||
|
||||
- name: Print database statistics
|
||||
run: nix develop .# --command sqlite3_analyzer result/jadb.sqlite
|
||||
|
||||
# TODO: Defer failure of tests until after the coverage report is generated and uploaded.
|
||||
- name: Run tests
|
||||
run: nix develop .# --command dart run test --concurrency=1 --coverage-path=coverage/lcov.info
|
||||
|
||||
- name: Generate coverage report
|
||||
run: |
|
||||
GENHTML_ARGS=(
|
||||
--current-date="$(date)"
|
||||
--dark-mode
|
||||
--output-directory coverage/report
|
||||
)
|
||||
|
||||
nix develop .# --command genhtml "${GENHTML_ARGS[@]}" coverage/lcov.info
|
||||
|
||||
- name: Upload coverage report
|
||||
uses: https://git.pvv.ntnu.no/Projects/rsync-action@v2
|
||||
with:
|
||||
source: ./coverage
|
||||
target: jadb/${{ gitea.ref_name }}/
|
||||
username: oysteikt
|
||||
ssh-key: ${{ secrets.OYSTEIKT_GITEA_WEBDOCS_SSH_KEY }}
|
||||
host: microbel.pvv.ntnu.no
|
||||
known-hosts: "microbel.pvv.ntnu.no ecdsa-sha2-nistp256 AAAAE2VjZHNhLXNoYTItbmlzdHAyNTYAAAAIbmlzdHAyNTYAAABBBEq0yasKP0mH6PI6ypmuzPzMnbHELo9k+YB5yW534aKudKZS65YsHJKQ9vapOtmegrn5MQbCCgrshf+/XwZcjbM="
|
||||
1
.gitignore
vendored
1
.gitignore
vendored
@@ -8,6 +8,7 @@
|
||||
# Conventional directory for build output.
|
||||
/doc/
|
||||
/build/
|
||||
/coverage/
|
||||
main.db
|
||||
|
||||
# Nix
|
||||
|
||||
29
README.md
29
README.md
@@ -1,7 +1,9 @@
|
||||
# jadb
|
||||
|
||||
[](https://builtwithnix.org)
|
||||
|
||||
[Latest coverage report](https://www.pvv.ntnu.no/~oysteikt/gitea/jadb/main/coverage/report/)
|
||||
|
||||
# jadb
|
||||
|
||||
An SQLite database containing open source japanese dictionary data combined from several sources
|
||||
|
||||
Note that while the license for the code is MIT, the data has various licenses.
|
||||
@@ -16,3 +18,26 @@ Note that while the license for the code is MIT, the data has various licenses.
|
||||
| **Tanos JLPT levels:** | https://www.tanos.co.uk/jlpt/ |
|
||||
| **Kangxi Radicals:** | https://ctext.org/kangxi-zidian |
|
||||
|
||||
## Implementation details
|
||||
|
||||
### Word search
|
||||
|
||||
The word search procedure is currently split into 3 parts:
|
||||
|
||||
1. **Entry ID query**:
|
||||
|
||||
Use a complex query with various scoring factors to try to get list of
|
||||
database ids pointing at dictionary entries, sorted by how likely we think this
|
||||
word is the word that the caller is looking for. The output here is a `List<int>`
|
||||
|
||||
2. **Data Query**:
|
||||
|
||||
Takes the entry id list from the last search, and performs all queries needed to retrieve
|
||||
all the dictionary data for those IDs. The result is a struct with a bunch of flattened lists
|
||||
with data for all the dictionary entries. These lists are sorted by the order that the ids
|
||||
were provided.
|
||||
|
||||
3. **Regrouping**:
|
||||
|
||||
Takes the flattened data, and regroups the items into structs with a more "hierarchical" structure.
|
||||
All data tagged with the same ID will end up in the same struct. Returns a list of these structs.
|
||||
|
||||
18
flake.lock
generated
18
flake.lock
generated
@@ -3,7 +3,7 @@
|
||||
"jmdict-src": {
|
||||
"flake": false,
|
||||
"locked": {
|
||||
"narHash": "sha256-5Y4ySJadyNF/Ckjv9rEjIpLnoN0YpbN+cvOawqiuo5Y=",
|
||||
"narHash": "sha256-lh46uougUzBrRhhwa7cOb32j5Jt9/RjBUhlVjwVzsII=",
|
||||
"type": "file",
|
||||
"url": "http://ftp.edrdg.org/pub/Nihongo/JMdict_e.gz"
|
||||
},
|
||||
@@ -15,7 +15,7 @@
|
||||
"jmdict-with-examples-src": {
|
||||
"flake": false,
|
||||
"locked": {
|
||||
"narHash": "sha256-/lOum1C/0zuq9W+g/TajsOgkTeai8vW4ubUdfX8ahX0=",
|
||||
"narHash": "sha256-5oS2xDyetbuSM6ax3LUjYA3N60x+D3Hg41HEXGFMqLQ=",
|
||||
"type": "file",
|
||||
"url": "http://ftp.edrdg.org/pub/Nihongo/JMdict_e_examp.gz"
|
||||
},
|
||||
@@ -27,7 +27,7 @@
|
||||
"kanjidic2-src": {
|
||||
"flake": false,
|
||||
"locked": {
|
||||
"narHash": "sha256-2RCsAsosBjMAgTzmd8YLa5qP9HIVy6wP4DoMNy1LCKM=",
|
||||
"narHash": "sha256-orSeQqSxhn9TtX3anYtbiMEm7nFkuomGnIKoVIUR2CM=",
|
||||
"type": "file",
|
||||
"url": "https://www.edrdg.org/kanjidic/kanjidic2.xml.gz"
|
||||
},
|
||||
@@ -38,11 +38,11 @@
|
||||
},
|
||||
"nixpkgs": {
|
||||
"locked": {
|
||||
"lastModified": 1752480373,
|
||||
"narHash": "sha256-JHQbm+OcGp32wAsXTE/FLYGNpb+4GLi5oTvCxwSoBOA=",
|
||||
"lastModified": 1771848320,
|
||||
"narHash": "sha256-0MAd+0mun3K/Ns8JATeHT1sX28faLII5hVLq0L3BdZU=",
|
||||
"owner": "NixOS",
|
||||
"repo": "nixpkgs",
|
||||
"rev": "62e0f05ede1da0d54515d4ea8ce9c733f12d9f08",
|
||||
"rev": "2fc6539b481e1d2569f25f8799236694180c0993",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
@@ -54,13 +54,13 @@
|
||||
"radkfile-src": {
|
||||
"flake": false,
|
||||
"locked": {
|
||||
"narHash": "sha256-rO2z5GPt3g6osZOlpyWysmIbRV2Gw4AR4XvngVTHNpk=",
|
||||
"narHash": "sha256-DHpMUE2Umje8PbzXUCS6pHZeXQ5+WTxbjSkGU3erDHQ=",
|
||||
"type": "file",
|
||||
"url": "http://ftp.usf.edu/pub/ftp.monash.edu.au/pub/nihongo/radkfile.gz"
|
||||
"url": "http://ftp.edrdg.org/pub/Nihongo/radkfile.gz"
|
||||
},
|
||||
"original": {
|
||||
"type": "file",
|
||||
"url": "http://ftp.usf.edu/pub/ftp.monash.edu.au/pub/nihongo/radkfile.gz"
|
||||
"url": "http://ftp.edrdg.org/pub/Nihongo/radkfile.gz"
|
||||
}
|
||||
},
|
||||
"root": {
|
||||
|
||||
14
flake.nix
14
flake.nix
@@ -16,7 +16,7 @@
|
||||
};
|
||||
|
||||
radkfile-src = {
|
||||
url = "http://ftp.usf.edu/pub/ftp.monash.edu.au/pub/nihongo/radkfile.gz";
|
||||
url = "http://ftp.edrdg.org/pub/Nihongo/radkfile.gz";
|
||||
flake = false;
|
||||
};
|
||||
|
||||
@@ -80,15 +80,17 @@
|
||||
buildInputs = with pkgs; [
|
||||
dart
|
||||
gnumake
|
||||
sqlite-interactive
|
||||
lcov
|
||||
sqlite-analyzer
|
||||
sqlite-interactive
|
||||
sqlite-web
|
||||
sqlint
|
||||
# sqlint
|
||||
sqlfluff
|
||||
];
|
||||
env = {
|
||||
LIBSQLITE_PATH = "${pkgs.sqlite.out}/lib/libsqlite3.so";
|
||||
JADB_PATH = "result/jadb.sqlite";
|
||||
LD_LIBRARY_PATH = lib.makeLibraryPath [ pkgs.sqlite ];
|
||||
};
|
||||
};
|
||||
});
|
||||
@@ -109,10 +111,14 @@
|
||||
in !(lib.any (b: b) [
|
||||
(!(lib.cleanSourceFilter path type))
|
||||
(baseName == ".github" && type == "directory")
|
||||
(baseName == ".gitea" && type == "directory")
|
||||
|
||||
(baseName == "nix" && type == "directory")
|
||||
(baseName == ".envrc" && type == "regular")
|
||||
(baseName == "flake.lock" && type == "regular")
|
||||
(baseName == "flake.nix" && type == "regular")
|
||||
|
||||
(baseName == ".sqlfluff" && type == "regular")
|
||||
])) ./.;
|
||||
|
||||
in forAllSystems (system: pkgs: {
|
||||
@@ -123,7 +129,7 @@
|
||||
'';
|
||||
|
||||
jmdict = pkgs.callPackage ./nix/jmdict.nix {
|
||||
inherit jmdict-src jmdict-with-examples-src edrdgMetadata;
|
||||
inherit jmdict-src jmdict-with-examples-src edrdgMetadata;
|
||||
};
|
||||
|
||||
radkfile = pkgs.callPackage ./nix/radkfile.nix {
|
||||
|
||||
@@ -10,14 +10,15 @@ List<int?> getPriorityValues(XmlElement e, String prefix) {
|
||||
final txt = pri.innerText;
|
||||
if (txt.startsWith('news')) {
|
||||
news = int.parse(txt.substring(4));
|
||||
} else if (txt.startsWith('ichi'))
|
||||
} else if (txt.startsWith('ichi')) {
|
||||
ichi = int.parse(txt.substring(4));
|
||||
else if (txt.startsWith('spec'))
|
||||
} else if (txt.startsWith('spec')) {
|
||||
spec = int.parse(txt.substring(4));
|
||||
else if (txt.startsWith('gai'))
|
||||
} else if (txt.startsWith('gai')) {
|
||||
gai = int.parse(txt.substring(3));
|
||||
else if (txt.startsWith('nf'))
|
||||
} else if (txt.startsWith('nf')) {
|
||||
nf = int.parse(txt.substring(2));
|
||||
}
|
||||
}
|
||||
return [news, ichi, spec, gai, nf];
|
||||
}
|
||||
@@ -79,16 +80,16 @@ List<Entry> parseJMDictData(XmlElement root) {
|
||||
final List<ReadingElement> readingEls = [];
|
||||
final List<Sense> senses = [];
|
||||
|
||||
for (final (kanjiNum, k_ele) in entry.findElements('k_ele').indexed) {
|
||||
final kePri = getPriorityValues(k_ele, 'ke');
|
||||
for (final (kanjiNum, kEle) in entry.findElements('k_ele').indexed) {
|
||||
final kePri = getPriorityValues(kEle, 'ke');
|
||||
kanjiEls.add(
|
||||
KanjiElement(
|
||||
orderNum: kanjiNum + 1,
|
||||
info: k_ele
|
||||
info: kEle
|
||||
.findElements('ke_inf')
|
||||
.map((e) => e.innerText.substring(1, e.innerText.length - 1))
|
||||
.toList(),
|
||||
reading: k_ele.findElements('keb').first.innerText,
|
||||
reading: kEle.findElements('keb').first.innerText,
|
||||
news: kePri[0],
|
||||
ichi: kePri[1],
|
||||
spec: kePri[2],
|
||||
@@ -98,24 +99,24 @@ List<Entry> parseJMDictData(XmlElement root) {
|
||||
);
|
||||
}
|
||||
|
||||
for (final (orderNum, r_ele) in entry.findElements('r_ele').indexed) {
|
||||
final rePri = getPriorityValues(r_ele, 're');
|
||||
final readingDoesNotMatchKanji = r_ele
|
||||
for (final (orderNum, rEle) in entry.findElements('r_ele').indexed) {
|
||||
final rePri = getPriorityValues(rEle, 're');
|
||||
final readingDoesNotMatchKanji = rEle
|
||||
.findElements('re_nokanji')
|
||||
.isNotEmpty;
|
||||
readingEls.add(
|
||||
ReadingElement(
|
||||
orderNum: orderNum + 1,
|
||||
readingDoesNotMatchKanji: readingDoesNotMatchKanji,
|
||||
info: r_ele
|
||||
info: rEle
|
||||
.findElements('re_inf')
|
||||
.map((e) => e.innerText.substring(1, e.innerText.length - 1))
|
||||
.toList(),
|
||||
restrictions: r_ele
|
||||
restrictions: rEle
|
||||
.findElements('re_restr')
|
||||
.map((e) => e.innerText)
|
||||
.toList(),
|
||||
reading: r_ele.findElements('reb').first.innerText,
|
||||
reading: rEle.findElements('reb').first.innerText,
|
||||
news: rePri[0],
|
||||
ichi: rePri[1],
|
||||
spec: rePri[2],
|
||||
|
||||
@@ -1,9 +1,7 @@
|
||||
import 'dart:ffi';
|
||||
import 'dart:io';
|
||||
|
||||
import 'package:jadb/search.dart';
|
||||
import 'package:sqflite_common_ffi/sqflite_ffi.dart';
|
||||
import 'package:sqlite3/open.dart';
|
||||
|
||||
Future<Database> openLocalDb({
|
||||
String? libsqlitePath,
|
||||
@@ -12,43 +10,27 @@ Future<Database> openLocalDb({
|
||||
bool verifyTablesExist = true,
|
||||
bool walMode = false,
|
||||
}) async {
|
||||
libsqlitePath ??= Platform.environment['LIBSQLITE_PATH'];
|
||||
jadbPath ??= Platform.environment['JADB_PATH'];
|
||||
jadbPath ??= Directory.current.uri.resolve('jadb.sqlite').path;
|
||||
|
||||
libsqlitePath = (libsqlitePath == null)
|
||||
? null
|
||||
: File(libsqlitePath).resolveSymbolicLinksSync();
|
||||
jadbPath = File(jadbPath).resolveSymbolicLinksSync();
|
||||
|
||||
if (libsqlitePath == null) {
|
||||
throw Exception('LIBSQLITE_PATH is not set');
|
||||
}
|
||||
|
||||
if (!File(libsqlitePath).existsSync()) {
|
||||
throw Exception('LIBSQLITE_PATH does not exist: $libsqlitePath');
|
||||
}
|
||||
|
||||
if (!File(jadbPath).existsSync()) {
|
||||
throw Exception('JADB_PATH does not exist: $jadbPath');
|
||||
}
|
||||
|
||||
final db =
|
||||
await createDatabaseFactoryFfi(
|
||||
ffiInit: () =>
|
||||
open.overrideForAll(() => DynamicLibrary.open(libsqlitePath!)),
|
||||
).openDatabase(
|
||||
jadbPath,
|
||||
options: OpenDatabaseOptions(
|
||||
onConfigure: (db) async {
|
||||
if (walMode) {
|
||||
await db.execute('PRAGMA journal_mode = WAL');
|
||||
}
|
||||
await db.execute('PRAGMA foreign_keys = ON');
|
||||
},
|
||||
readOnly: !readWrite,
|
||||
),
|
||||
);
|
||||
final db = await createDatabaseFactoryFfi().openDatabase(
|
||||
jadbPath,
|
||||
options: OpenDatabaseOptions(
|
||||
onConfigure: (db) async {
|
||||
if (walMode) {
|
||||
await db.execute('PRAGMA journal_mode = WAL');
|
||||
}
|
||||
await db.execute('PRAGMA foreign_keys = ON');
|
||||
},
|
||||
readOnly: !readWrite,
|
||||
),
|
||||
);
|
||||
|
||||
if (verifyTablesExist) {
|
||||
await db.jadbVerifyTables();
|
||||
|
||||
@@ -3,12 +3,20 @@ import 'dart:io';
|
||||
|
||||
import 'package:csv/csv.dart';
|
||||
import 'package:jadb/_data_ingestion/tanos-jlpt/objects.dart';
|
||||
import 'package:xml/xml_events.dart';
|
||||
|
||||
Future<List<JLPTRankedWord>> parseJLPTRankedWords(
|
||||
Map<String, File> files,
|
||||
) async {
|
||||
final List<JLPTRankedWord> result = [];
|
||||
|
||||
final codec = CsvCodec(
|
||||
fieldDelimiter: ',',
|
||||
lineDelimiter: '\n',
|
||||
quoteMode: QuoteMode.strings,
|
||||
escapeCharacter: '\\',
|
||||
);
|
||||
|
||||
for (final entry in files.entries) {
|
||||
final jlptLevel = entry.key;
|
||||
final file = entry.value;
|
||||
@@ -17,42 +25,42 @@ Future<List<JLPTRankedWord>> parseJLPTRankedWords(
|
||||
throw Exception('File $jlptLevel does not exist');
|
||||
}
|
||||
|
||||
final rows = await file
|
||||
final words = await file
|
||||
.openRead()
|
||||
.transform(utf8.decoder)
|
||||
.transform(CsvToListConverter())
|
||||
.transform(codec.decoder)
|
||||
.flatten()
|
||||
.map((row) {
|
||||
if (row.length != 3) {
|
||||
throw Exception('Invalid line in $jlptLevel: $row');
|
||||
}
|
||||
return row;
|
||||
})
|
||||
.map((row) => row.map((e) => e as String).toList())
|
||||
.map((row) {
|
||||
final kanji = row[0].isEmpty
|
||||
? null
|
||||
: row[0]
|
||||
.replaceFirst(RegExp('^お・'), '')
|
||||
.replaceAll(RegExp(r'(.*)'), '');
|
||||
|
||||
final readings = row[1]
|
||||
.split(RegExp('[・/、(:?s+)]'))
|
||||
.map((e) => e.trim())
|
||||
.toList();
|
||||
|
||||
final meanings = row[2].split(',').expand(cleanMeaning).toList();
|
||||
|
||||
return JLPTRankedWord(
|
||||
readings: readings,
|
||||
kanji: kanji,
|
||||
jlptLevel: jlptLevel,
|
||||
meanings: meanings,
|
||||
);
|
||||
})
|
||||
.toList();
|
||||
|
||||
for (final row in rows) {
|
||||
if (row.length != 3) {
|
||||
throw Exception('Invalid line in $jlptLevel: $row');
|
||||
}
|
||||
|
||||
final kanji = (row[0] as String).isEmpty
|
||||
? null
|
||||
: (row[0] as String)
|
||||
.replaceFirst(RegExp('^お・'), '')
|
||||
.replaceAll(RegExp(r'(.*)'), '');
|
||||
|
||||
final readings = (row[1] as String)
|
||||
.split(RegExp('[・/、(:?s+)]'))
|
||||
.map((e) => e.trim())
|
||||
.toList();
|
||||
|
||||
final meanings = (row[2] as String)
|
||||
.split(',')
|
||||
.expand(cleanMeaning)
|
||||
.toList();
|
||||
|
||||
result.add(
|
||||
JLPTRankedWord(
|
||||
readings: readings,
|
||||
kanji: kanji,
|
||||
jlptLevel: jlptLevel,
|
||||
meanings: meanings,
|
||||
),
|
||||
);
|
||||
}
|
||||
result.addAll(words);
|
||||
}
|
||||
|
||||
return result;
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
const Map<(String?, String), int?> TANOS_JLPT_OVERRIDES = {
|
||||
const Map<(String?, String), int?> tanosJLPTOverrides = {
|
||||
// N5:
|
||||
(null, 'あなた'): 1223615,
|
||||
(null, 'あの'): 1000430,
|
||||
|
||||
@@ -76,7 +76,7 @@ Future<int?> findEntry(
|
||||
if ((entryIds.isEmpty || entryIds.length > 1) && useOverrides) {
|
||||
print('No entry found, trying to fetch from overrides');
|
||||
final overrideEntries = word.readings
|
||||
.map((reading) => TANOS_JLPT_OVERRIDES[(word.kanji, reading)])
|
||||
.map((reading) => tanosJLPTOverrides[(word.kanji, reading)])
|
||||
.whereType<int>()
|
||||
.toSet();
|
||||
|
||||
@@ -86,7 +86,7 @@ Future<int?> findEntry(
|
||||
);
|
||||
} else if (overrideEntries.isEmpty &&
|
||||
!word.readings.any(
|
||||
(reading) => TANOS_JLPT_OVERRIDES.containsKey((word.kanji, reading)),
|
||||
(reading) => tanosJLPTOverrides.containsKey((word.kanji, reading)),
|
||||
)) {
|
||||
throw Exception(
|
||||
'No override entry found for ${word.toString()}: $entryIds',
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
/// Jouyou kanji sorted primarily by grades and secondarily by strokes.
|
||||
const Map<int, Map<int, List<String>>>
|
||||
JOUYOU_KANJI_BY_GRADE_AND_STROKE_COUNT = {
|
||||
const Map<int, Map<int, List<String>>> jouyouKanjiByGradeAndStrokeCount = {
|
||||
1: {
|
||||
1: ['一'],
|
||||
2: ['力', '八', '入', '二', '人', '十', '七', '九'],
|
||||
@@ -1861,8 +1860,8 @@ JOUYOU_KANJI_BY_GRADE_AND_STROKE_COUNT = {
|
||||
},
|
||||
};
|
||||
|
||||
final Map<int, List<String>> JOUYOU_KANJI_BY_GRADES =
|
||||
JOUYOU_KANJI_BY_GRADE_AND_STROKE_COUNT.entries
|
||||
final Map<int, List<String>> jouyouKanjiByGrades =
|
||||
jouyouKanjiByGradeAndStrokeCount.entries
|
||||
.expand((entry) => entry.value.entries)
|
||||
.map((entry) => MapEntry(entry.key, entry.value))
|
||||
.fold<Map<int, List<String>>>(
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
const Map<int, List<String>> RADICALS = {
|
||||
const Map<int, List<String>> radicals = {
|
||||
1: ['一', '|', '丶', 'ノ', '乙', '亅'],
|
||||
2: [
|
||||
'二',
|
||||
|
||||
62
lib/models/word_search/word_search_match_span.dart
Normal file
62
lib/models/word_search/word_search_match_span.dart
Normal file
@@ -0,0 +1,62 @@
|
||||
enum WordSearchMatchSpanType { kanji, kana, sense }
|
||||
|
||||
/// A span of a word search result that corresponds to a match for a kanji, kana, or sense.
|
||||
class WordSearchMatchSpan {
|
||||
/// Which subtype of the word search result this span corresponds to - either a kanji, a kana, or a sense.
|
||||
final WordSearchMatchSpanType spanType;
|
||||
|
||||
/// The index of the kanji/kana/sense in the word search result that this span corresponds to.
|
||||
final int index;
|
||||
|
||||
/// When matching a 'sense', this is the index of the English definition in that sense that this span corresponds to. Otherwise, this is always 0.
|
||||
final int subIndex;
|
||||
|
||||
/// The start of the span (inclusive)
|
||||
final int start;
|
||||
|
||||
/// The end of the span (inclusive)
|
||||
final int end;
|
||||
|
||||
WordSearchMatchSpan({
|
||||
required this.spanType,
|
||||
required this.index,
|
||||
required this.start,
|
||||
required this.end,
|
||||
this.subIndex = 0,
|
||||
});
|
||||
|
||||
@override
|
||||
String toString() {
|
||||
return 'WordSearchMatchSpan(spanType: $spanType, index: $index, start: $start, end: $end)';
|
||||
}
|
||||
|
||||
Map<String, Object?> toJson() => {
|
||||
'spanType': spanType.toString().split('.').last,
|
||||
'index': index,
|
||||
'start': start,
|
||||
'end': end,
|
||||
};
|
||||
|
||||
factory WordSearchMatchSpan.fromJson(Map<String, dynamic> json) =>
|
||||
WordSearchMatchSpan(
|
||||
spanType: WordSearchMatchSpanType.values.firstWhere(
|
||||
(e) => e.toString().split('.').last == json['spanType'],
|
||||
),
|
||||
index: json['index'] as int,
|
||||
start: json['start'] as int,
|
||||
end: json['end'] as int,
|
||||
);
|
||||
|
||||
@override
|
||||
int get hashCode => Object.hash(spanType, index, start, end);
|
||||
|
||||
@override
|
||||
bool operator ==(Object other) {
|
||||
if (identical(this, other)) return true;
|
||||
return other is WordSearchMatchSpan &&
|
||||
other.spanType == spanType &&
|
||||
other.index == index &&
|
||||
other.start == start &&
|
||||
other.end == end;
|
||||
}
|
||||
}
|
||||
@@ -1,9 +1,12 @@
|
||||
import 'package:jadb/models/common/jlpt_level.dart';
|
||||
import 'package:jadb/models/jmdict/jmdict_kanji_info.dart';
|
||||
import 'package:jadb/models/jmdict/jmdict_reading_info.dart';
|
||||
import 'package:jadb/models/word_search/word_search_match_span.dart';
|
||||
import 'package:jadb/models/word_search/word_search_ruby.dart';
|
||||
import 'package:jadb/models/word_search/word_search_sense.dart';
|
||||
import 'package:jadb/models/word_search/word_search_sources.dart';
|
||||
import 'package:jadb/search/word_search/word_search.dart';
|
||||
import 'package:jadb/util/romaji_transliteration.dart';
|
||||
|
||||
/// A class representing a single dictionary entry from a word search.
|
||||
class WordSearchResult {
|
||||
@@ -34,7 +37,44 @@ class WordSearchResult {
|
||||
/// A class listing the sources used to make up the data for this word search result.
|
||||
final WordSearchSources sources;
|
||||
|
||||
const WordSearchResult({
|
||||
/// A list of spans, specifying which part of this word result matched the search keyword.
|
||||
///
|
||||
/// Note that this is considered ephemeral data - it does not originate from the dictionary,
|
||||
/// and unlike the rest of the class it varies based on external information (the searchword).
|
||||
/// It will *NOT* be exported to JSON, but can be reinferred by invoking [inferMatchSpans] with
|
||||
/// the original searchword.
|
||||
List<WordSearchMatchSpan>? matchSpans;
|
||||
|
||||
/// All contents of [japanese], transliterated to romaji
|
||||
List<String> get romaji => japanese
|
||||
.map((word) => transliterateKanaToLatin(word.furigana ?? word.base))
|
||||
.toList();
|
||||
|
||||
/// All contents of [japanase], where the furigana has either been transliterated to romaji, or
|
||||
/// contains the furigana transliteration of [WordSearchRuby.base].
|
||||
List<WordSearchRuby> get romajiRubys => japanese
|
||||
.map(
|
||||
(word) => WordSearchRuby(
|
||||
base: word.base,
|
||||
furigana: word.furigana != null
|
||||
? transliterateKanaToLatin(word.furigana!)
|
||||
: transliterateKanaToLatin(word.base),
|
||||
),
|
||||
)
|
||||
.toList();
|
||||
|
||||
/// The same list of spans as [matchSpans], but the positions have been adjusted for romaji conversion
|
||||
///
|
||||
/// This is mostly useful in conjunction with [romajiRubys].
|
||||
List<WordSearchMatchSpan>? get romajiMatchSpans {
|
||||
if (matchSpans == null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
throw UnimplementedError('Not yet implemented');
|
||||
}
|
||||
|
||||
WordSearchResult({
|
||||
required this.score,
|
||||
required this.entryId,
|
||||
required this.isCommon,
|
||||
@@ -44,6 +84,7 @@ class WordSearchResult {
|
||||
required this.senses,
|
||||
required this.jlptLevel,
|
||||
required this.sources,
|
||||
this.matchSpans,
|
||||
});
|
||||
|
||||
Map<String, dynamic> toJson() => {
|
||||
@@ -81,7 +122,78 @@ class WordSearchResult {
|
||||
sources: WordSearchSources.fromJson(json['sources']),
|
||||
);
|
||||
|
||||
String _formatJapaneseWord(WordSearchRuby word) =>
|
||||
factory WordSearchResult.empty() => WordSearchResult(
|
||||
score: 0,
|
||||
entryId: 0,
|
||||
isCommon: false,
|
||||
japanese: [],
|
||||
kanjiInfo: {},
|
||||
readingInfo: {},
|
||||
senses: [],
|
||||
jlptLevel: JlptLevel.none,
|
||||
sources: WordSearchSources.empty(),
|
||||
);
|
||||
|
||||
/// Infers which part(s) of this word search result matched the search keyword, and populates [matchSpans] accordingly.
|
||||
void inferMatchSpans(
|
||||
String searchword, {
|
||||
SearchMode searchMode = SearchMode.auto,
|
||||
}) {
|
||||
// TODO: handle wildcards like '?' and '*' when that becomes supported in the search.
|
||||
// TODO: If the searchMode is provided, we can use that to narrow down which part of the word search results to look at.
|
||||
|
||||
final regex = RegExp(RegExp.escape(searchword));
|
||||
final matchSpans = <WordSearchMatchSpan>[];
|
||||
|
||||
for (final (i, japanese) in japanese.indexed) {
|
||||
final baseMatches = regex.allMatches(japanese.base);
|
||||
matchSpans.addAll(
|
||||
baseMatches.map(
|
||||
(match) => WordSearchMatchSpan(
|
||||
spanType: WordSearchMatchSpanType.kanji,
|
||||
index: i,
|
||||
start: match.start,
|
||||
end: match.end,
|
||||
),
|
||||
),
|
||||
);
|
||||
|
||||
if (japanese.furigana != null) {
|
||||
final furiganaMatches = regex.allMatches(japanese.furigana!);
|
||||
matchSpans.addAll(
|
||||
furiganaMatches.map(
|
||||
(match) => WordSearchMatchSpan(
|
||||
spanType: WordSearchMatchSpanType.kana,
|
||||
index: i,
|
||||
start: match.start,
|
||||
end: match.end,
|
||||
),
|
||||
),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
for (final (i, sense) in senses.indexed) {
|
||||
for (final (k, definition) in sense.englishDefinitions.indexed) {
|
||||
final definitionMatches = regex.allMatches(definition);
|
||||
matchSpans.addAll(
|
||||
definitionMatches.map(
|
||||
(match) => WordSearchMatchSpan(
|
||||
spanType: WordSearchMatchSpanType.sense,
|
||||
index: i,
|
||||
subIndex: k,
|
||||
start: match.start,
|
||||
end: match.end,
|
||||
),
|
||||
),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
this.matchSpans = matchSpans;
|
||||
}
|
||||
|
||||
static String _formatJapaneseWord(WordSearchRuby word) =>
|
||||
word.furigana == null ? word.base : '${word.base} (${word.furigana})';
|
||||
|
||||
@override
|
||||
|
||||
@@ -9,6 +9,8 @@ class WordSearchSources {
|
||||
|
||||
const WordSearchSources({this.jmdict = true, this.jmnedict = false});
|
||||
|
||||
factory WordSearchSources.empty() => const WordSearchSources();
|
||||
|
||||
Map<String, Object?> get sqlValue => {'jmdict': jmdict, 'jmnedict': jmnedict};
|
||||
|
||||
Map<String, dynamic> toJson() => {'jmdict': jmdict, 'jmnedict': jmnedict};
|
||||
|
||||
@@ -30,7 +30,7 @@ extension JaDBConnection on DatabaseExecutor {
|
||||
/// Search for a word in the database.
|
||||
Future<List<WordSearchResult>?> jadbSearchWord(
|
||||
String word, {
|
||||
SearchMode searchMode = SearchMode.Auto,
|
||||
SearchMode searchMode = SearchMode.auto,
|
||||
int page = 0,
|
||||
int? pageSize,
|
||||
}) => searchWordWithDbConnection(
|
||||
@@ -54,7 +54,7 @@ extension JaDBConnection on DatabaseExecutor {
|
||||
/// Search for a word in the database, and return the count of results.
|
||||
Future<int?> jadbSearchWordCount(
|
||||
String word, {
|
||||
SearchMode searchMode = SearchMode.Auto,
|
||||
SearchMode searchMode = SearchMode.auto,
|
||||
}) => searchWordCountWithDbConnection(this, word, searchMode: searchMode);
|
||||
|
||||
/// Given a list of radicals, search which kanji contains all
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
import 'package:jadb/table_names/kanjidic.dart';
|
||||
import 'package:sqflite_common/sqflite.dart';
|
||||
|
||||
/// Filters a list of kanji characters, returning only those that exist in the database.
|
||||
///
|
||||
/// If [deduplicate] is true, the returned list will deduplicate the input kanji list before returning the filtered results.
|
||||
Future<List<String>> filterKanjiWithDbConnection(
|
||||
DatabaseExecutor connection,
|
||||
List<String> kanji,
|
||||
@@ -15,7 +18,15 @@ Future<List<String>> filterKanjiWithDbConnection(
|
||||
.then((value) => value.map((e) => e['literal'] as String).toSet());
|
||||
|
||||
if (deduplicate) {
|
||||
return filteredKanji.toList();
|
||||
final List<String> result = [];
|
||||
final Set<String> seen = {};
|
||||
for (final k in kanji) {
|
||||
if (filteredKanji.contains(k) && !seen.contains(k)) {
|
||||
result.add(k);
|
||||
seen.add(k);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
} else {
|
||||
return kanji.where((k) => filteredKanji.contains(k)).toList();
|
||||
}
|
||||
|
||||
@@ -3,142 +3,188 @@ import 'package:jadb/models/kanji_search/kanji_search_radical.dart';
|
||||
import 'package:jadb/models/kanji_search/kanji_search_result.dart';
|
||||
import 'package:jadb/table_names/kanjidic.dart';
|
||||
import 'package:jadb/table_names/radkfile.dart';
|
||||
import 'package:jadb/util/romaji_transliteration.dart';
|
||||
import 'package:sqflite_common/sqflite.dart';
|
||||
|
||||
Future<List<Map<String, Object?>>> _charactersQuery(
|
||||
DatabaseExecutor connection,
|
||||
String kanji,
|
||||
) => connection.query(
|
||||
KANJIDICTableNames.character,
|
||||
where: 'literal = ?',
|
||||
whereArgs: [kanji],
|
||||
);
|
||||
|
||||
Future<List<Map<String, Object?>>> _codepointsQuery(
|
||||
DatabaseExecutor connection,
|
||||
String kanji,
|
||||
) => connection.query(
|
||||
KANJIDICTableNames.codepoint,
|
||||
where: 'kanji = ?',
|
||||
whereArgs: [kanji],
|
||||
);
|
||||
|
||||
Future<List<Map<String, Object?>>> _kunyomisQuery(
|
||||
DatabaseExecutor connection,
|
||||
String kanji,
|
||||
) => connection.query(
|
||||
KANJIDICTableNames.kunyomi,
|
||||
where: 'kanji = ?',
|
||||
whereArgs: [kanji],
|
||||
orderBy: 'orderNum',
|
||||
);
|
||||
|
||||
Future<List<Map<String, Object?>>> _onyomisQuery(
|
||||
DatabaseExecutor connection,
|
||||
String kanji,
|
||||
) => connection.query(
|
||||
KANJIDICTableNames.onyomi,
|
||||
where: 'kanji = ?',
|
||||
whereArgs: [kanji],
|
||||
orderBy: 'orderNum',
|
||||
);
|
||||
|
||||
Future<List<Map<String, Object?>>> _meaningsQuery(
|
||||
DatabaseExecutor connection,
|
||||
String kanji,
|
||||
) => connection.query(
|
||||
KANJIDICTableNames.meaning,
|
||||
where: 'kanji = ? AND language = ?',
|
||||
whereArgs: [kanji, 'eng'],
|
||||
orderBy: 'orderNum',
|
||||
);
|
||||
|
||||
Future<List<Map<String, Object?>>> _nanorisQuery(
|
||||
DatabaseExecutor connection,
|
||||
String kanji,
|
||||
) => connection.query(
|
||||
KANJIDICTableNames.nanori,
|
||||
where: 'kanji = ?',
|
||||
whereArgs: [kanji],
|
||||
);
|
||||
|
||||
Future<List<Map<String, Object?>>> _dictionaryReferencesQuery(
|
||||
DatabaseExecutor connection,
|
||||
String kanji,
|
||||
) => connection.query(
|
||||
KANJIDICTableNames.dictionaryReference,
|
||||
where: 'kanji = ?',
|
||||
whereArgs: [kanji],
|
||||
);
|
||||
|
||||
Future<List<Map<String, Object?>>> _queryCodesQuery(
|
||||
DatabaseExecutor connection,
|
||||
String kanji,
|
||||
) => connection.query(
|
||||
KANJIDICTableNames.queryCode,
|
||||
where: 'kanji = ?',
|
||||
whereArgs: [kanji],
|
||||
);
|
||||
|
||||
Future<List<Map<String, Object?>>> _radicalsQuery(
|
||||
DatabaseExecutor connection,
|
||||
String kanji,
|
||||
) => connection.rawQuery(
|
||||
'''
|
||||
SELECT DISTINCT
|
||||
"XREF__KANJIDIC_Radical__RADKFILE"."radicalSymbol" AS "symbol",
|
||||
"names"
|
||||
FROM "${KANJIDICTableNames.radical}"
|
||||
JOIN "XREF__KANJIDIC_Radical__RADKFILE" USING ("radicalId")
|
||||
LEFT JOIN (
|
||||
SELECT "radicalId", group_concat("name") AS "names"
|
||||
FROM "${KANJIDICTableNames.radicalName}"
|
||||
GROUP BY "radicalId"
|
||||
) USING ("radicalId")
|
||||
WHERE "${KANJIDICTableNames.radical}"."kanji" = ?
|
||||
''',
|
||||
[kanji],
|
||||
);
|
||||
|
||||
Future<List<Map<String, Object?>>> _partsQuery(
|
||||
DatabaseExecutor connection,
|
||||
String kanji,
|
||||
) => connection.query(
|
||||
RADKFILETableNames.radkfile,
|
||||
where: 'kanji = ?',
|
||||
whereArgs: [kanji],
|
||||
);
|
||||
|
||||
Future<List<Map<String, Object?>>> _readingsQuery(
|
||||
DatabaseExecutor connection,
|
||||
String kanji,
|
||||
) => connection.query(
|
||||
KANJIDICTableNames.reading,
|
||||
where: 'kanji = ?',
|
||||
whereArgs: [kanji],
|
||||
);
|
||||
|
||||
Future<List<Map<String, Object?>>> _strokeMiscountsQuery(
|
||||
DatabaseExecutor connection,
|
||||
String kanji,
|
||||
) => connection.query(
|
||||
KANJIDICTableNames.strokeMiscount,
|
||||
where: 'kanji = ?',
|
||||
whereArgs: [kanji],
|
||||
);
|
||||
|
||||
// Future<List<Map<String, Object?>>> _variantsQuery(
|
||||
// DatabaseExecutor connection,
|
||||
// String kanji,
|
||||
// ) => connection.query(
|
||||
// KANJIDICTableNames.variant,
|
||||
// where: 'kanji = ?',
|
||||
// whereArgs: [kanji],
|
||||
// );
|
||||
|
||||
/// Searches for a kanji character and returns its details, or null if the kanji is not found in the database.
|
||||
Future<KanjiSearchResult?> searchKanjiWithDbConnection(
|
||||
DatabaseExecutor connection,
|
||||
String kanji,
|
||||
) async {
|
||||
late final List<Map<String, Object?>> characters;
|
||||
final charactersQuery = connection.query(
|
||||
KANJIDICTableNames.character,
|
||||
where: 'literal = ?',
|
||||
whereArgs: [kanji],
|
||||
);
|
||||
|
||||
late final List<Map<String, Object?>> codepoints;
|
||||
final codepointsQuery = connection.query(
|
||||
KANJIDICTableNames.codepoint,
|
||||
where: 'kanji = ?',
|
||||
whereArgs: [kanji],
|
||||
);
|
||||
|
||||
late final List<Map<String, Object?>> kunyomis;
|
||||
final kunyomisQuery = connection.query(
|
||||
KANJIDICTableNames.kunyomi,
|
||||
where: 'kanji = ?',
|
||||
whereArgs: [kanji],
|
||||
orderBy: 'orderNum',
|
||||
);
|
||||
|
||||
late final List<Map<String, Object?>> onyomis;
|
||||
final onyomisQuery = connection.query(
|
||||
KANJIDICTableNames.onyomi,
|
||||
where: 'kanji = ?',
|
||||
whereArgs: [kanji],
|
||||
orderBy: 'orderNum',
|
||||
);
|
||||
|
||||
late final List<Map<String, Object?>> meanings;
|
||||
final meaningsQuery = connection.query(
|
||||
KANJIDICTableNames.meaning,
|
||||
where: 'kanji = ? AND language = ?',
|
||||
whereArgs: [kanji, 'eng'],
|
||||
orderBy: 'orderNum',
|
||||
);
|
||||
|
||||
late final List<Map<String, Object?>> nanoris;
|
||||
final nanorisQuery = connection.query(
|
||||
KANJIDICTableNames.nanori,
|
||||
where: 'kanji = ?',
|
||||
whereArgs: [kanji],
|
||||
);
|
||||
|
||||
late final List<Map<String, Object?>> dictionaryReferences;
|
||||
final dictionaryReferencesQuery = connection.query(
|
||||
KANJIDICTableNames.dictionaryReference,
|
||||
where: 'kanji = ?',
|
||||
whereArgs: [kanji],
|
||||
);
|
||||
|
||||
late final List<Map<String, Object?>> queryCodes;
|
||||
final queryCodesQuery = connection.query(
|
||||
KANJIDICTableNames.queryCode,
|
||||
where: 'kanji = ?',
|
||||
whereArgs: [kanji],
|
||||
);
|
||||
|
||||
late final List<Map<String, Object?>> radicals;
|
||||
final radicalsQuery = connection.rawQuery(
|
||||
'''
|
||||
SELECT DISTINCT
|
||||
"XREF__KANJIDIC_Radical__RADKFILE"."radicalSymbol" AS "symbol",
|
||||
"names"
|
||||
FROM "${KANJIDICTableNames.radical}"
|
||||
JOIN "XREF__KANJIDIC_Radical__RADKFILE" USING ("radicalId")
|
||||
LEFT JOIN (
|
||||
SELECT "radicalId", group_concat("name") AS "names"
|
||||
FROM "${KANJIDICTableNames.radicalName}"
|
||||
GROUP BY "radicalId"
|
||||
) USING ("radicalId")
|
||||
WHERE "${KANJIDICTableNames.radical}"."kanji" = ?
|
||||
''',
|
||||
[kanji],
|
||||
);
|
||||
|
||||
late final List<Map<String, Object?>> parts;
|
||||
final partsQuery = connection.query(
|
||||
RADKFILETableNames.radkfile,
|
||||
where: 'kanji = ?',
|
||||
whereArgs: [kanji],
|
||||
);
|
||||
|
||||
late final List<Map<String, Object?>> readings;
|
||||
final readingsQuery = connection.query(
|
||||
KANJIDICTableNames.reading,
|
||||
where: 'kanji = ?',
|
||||
whereArgs: [kanji],
|
||||
);
|
||||
|
||||
late final List<Map<String, Object?>> strokeMiscounts;
|
||||
final strokeMiscountsQuery = connection.query(
|
||||
KANJIDICTableNames.strokeMiscount,
|
||||
where: 'kanji = ?',
|
||||
whereArgs: [kanji],
|
||||
);
|
||||
|
||||
// TODO: add variant data to result
|
||||
// late final List<Map<String, Object?>> variants;
|
||||
// final variants_query = connection.query(
|
||||
// KANJIDICTableNames.variant,
|
||||
// where: "kanji = ?",
|
||||
// whereArgs: [kanji],
|
||||
// );
|
||||
|
||||
// TODO: Search for kunyomi and onyomi usage of the characters
|
||||
// from JMDict. We'll need to fuzzy aquery JMDict_KanjiElement for mathces,
|
||||
// from JMDict. We'll need to fuzzy aquery JMDict_KanjiElement for matches,
|
||||
// filter JMdict_ReadingElement for kunyomi/onyomi, and then sort the main entry
|
||||
// by JLPT, news frequency, etc.
|
||||
|
||||
await charactersQuery.then((value) => characters = value);
|
||||
await _charactersQuery(connection, kanji).then((value) => characters = value);
|
||||
|
||||
if (characters.isEmpty) {
|
||||
return null;
|
||||
}
|
||||
|
||||
await Future.wait({
|
||||
codepointsQuery.then((value) => codepoints = value),
|
||||
kunyomisQuery.then((value) => kunyomis = value),
|
||||
onyomisQuery.then((value) => onyomis = value),
|
||||
meaningsQuery.then((value) => meanings = value),
|
||||
nanorisQuery.then((value) => nanoris = value),
|
||||
dictionaryReferencesQuery.then((value) => dictionaryReferences = value),
|
||||
queryCodesQuery.then((value) => queryCodes = value),
|
||||
radicalsQuery.then((value) => radicals = value),
|
||||
partsQuery.then((value) => parts = value),
|
||||
readingsQuery.then((value) => readings = value),
|
||||
strokeMiscountsQuery.then((value) => strokeMiscounts = value),
|
||||
_codepointsQuery(connection, kanji).then((value) => codepoints = value),
|
||||
_kunyomisQuery(connection, kanji).then((value) => kunyomis = value),
|
||||
_onyomisQuery(connection, kanji).then((value) => onyomis = value),
|
||||
_meaningsQuery(connection, kanji).then((value) => meanings = value),
|
||||
_nanorisQuery(connection, kanji).then((value) => nanoris = value),
|
||||
_dictionaryReferencesQuery(
|
||||
connection,
|
||||
kanji,
|
||||
).then((value) => dictionaryReferences = value),
|
||||
_queryCodesQuery(connection, kanji).then((value) => queryCodes = value),
|
||||
_radicalsQuery(connection, kanji).then((value) => radicals = value),
|
||||
_partsQuery(connection, kanji).then((value) => parts = value),
|
||||
_readingsQuery(connection, kanji).then((value) => readings = value),
|
||||
_strokeMiscountsQuery(
|
||||
connection,
|
||||
kanji,
|
||||
).then((value) => strokeMiscounts = value),
|
||||
// variants_query.then((value) => variants = value),
|
||||
});
|
||||
|
||||
@@ -196,10 +242,7 @@ Future<KanjiSearchResult?> searchKanjiWithDbConnection(
|
||||
meanings: meanings.map((item) => item['meaning'] as String).toList(),
|
||||
kunyomi: kunyomis.map((item) => item['yomi'] as String).toList(),
|
||||
parts: parts.map((item) => item['radical'] as String).toList(),
|
||||
onyomi: onyomis
|
||||
.map((item) => item['yomi'] as String)
|
||||
.map(transliterateHiraganaToKatakana)
|
||||
.toList(),
|
||||
onyomi: onyomis.map((item) => item['yomi'] as String).toList(),
|
||||
radical: radical,
|
||||
codepoints: {
|
||||
for (final codepoint in codepoints)
|
||||
@@ -217,6 +260,7 @@ Future<KanjiSearchResult?> searchKanjiWithDbConnection(
|
||||
|
||||
// TODO: Use fewer queries with `IN` clauses to reduce the number of queries
|
||||
|
||||
/// Searches for multiple kanji at once, returning a map of kanji to their search results.
|
||||
Future<Map<String, KanjiSearchResult>> searchManyKanjiWithDbConnection(
|
||||
DatabaseExecutor connection,
|
||||
Set<String> kanji,
|
||||
|
||||
@@ -3,10 +3,16 @@ import 'package:sqflite_common/sqlite_api.dart';
|
||||
|
||||
// TODO: validate that the list of radicals all are valid radicals
|
||||
|
||||
/// Returns a list of radicals that are part of any kanji that contains all of the input radicals.
|
||||
///
|
||||
/// This can be used to limit the choices of additional radicals provided to a user,
|
||||
/// so that any choice they make will still yield at least one kanji.
|
||||
Future<List<String>> searchRemainingRadicalsWithDbConnection(
|
||||
DatabaseExecutor connection,
|
||||
List<String> radicals,
|
||||
) async {
|
||||
final distinctRadicals = radicals.toSet();
|
||||
|
||||
final queryResult = await connection.rawQuery(
|
||||
'''
|
||||
SELECT DISTINCT "radical"
|
||||
@@ -14,12 +20,12 @@ Future<List<String>> searchRemainingRadicalsWithDbConnection(
|
||||
WHERE "kanji" IN (
|
||||
SELECT "kanji"
|
||||
FROM "${RADKFILETableNames.radkfile}"
|
||||
WHERE "radical" IN (${List.filled(radicals.length, '?').join(',')})
|
||||
WHERE "radical" IN (${List.filled(distinctRadicals.length, '?').join(',')})
|
||||
GROUP BY "kanji"
|
||||
HAVING COUNT(DISTINCT "radical") = ?
|
||||
)
|
||||
''',
|
||||
[...radicals, radicals.length],
|
||||
[...distinctRadicals, distinctRadicals.length],
|
||||
);
|
||||
|
||||
final remainingRadicals = queryResult
|
||||
@@ -29,19 +35,22 @@ Future<List<String>> searchRemainingRadicalsWithDbConnection(
|
||||
return remainingRadicals;
|
||||
}
|
||||
|
||||
/// Returns a list of kanji that contain all of the input radicals.
|
||||
Future<List<String>> searchKanjiByRadicalsWithDbConnection(
|
||||
DatabaseExecutor connection,
|
||||
List<String> radicals,
|
||||
) async {
|
||||
final distinctRadicals = radicals.toSet();
|
||||
|
||||
final queryResult = await connection.rawQuery(
|
||||
'''
|
||||
SELECT "kanji"
|
||||
FROM "${RADKFILETableNames.radkfile}"
|
||||
WHERE "radical" IN (${List.filled(radicals.length, '?').join(',')})
|
||||
WHERE "radical" IN (${List.filled(distinctRadicals.length, '?').join(',')})
|
||||
GROUP BY "kanji"
|
||||
HAVING COUNT(DISTINCT "radical") = ?
|
||||
''',
|
||||
[...radicals, radicals.length],
|
||||
[...distinctRadicals, distinctRadicals.length],
|
||||
);
|
||||
|
||||
final kanji = queryResult.map((row) => row['kanji'] as String).toList();
|
||||
|
||||
@@ -53,274 +53,363 @@ class LinearWordQueryData {
|
||||
});
|
||||
}
|
||||
|
||||
Future<List<Map<String, Object?>>> _sensesQuery(
|
||||
DatabaseExecutor connection,
|
||||
List<int> entryIds,
|
||||
) => connection.query(
|
||||
JMdictTableNames.sense,
|
||||
where: 'entryId IN (${List.filled(entryIds.length, '?').join(',')})',
|
||||
whereArgs: entryIds,
|
||||
);
|
||||
|
||||
Future<List<Map<String, Object?>>> _readingelementsQuery(
|
||||
DatabaseExecutor connection,
|
||||
List<int> entryIds,
|
||||
) => connection.query(
|
||||
JMdictTableNames.readingElement,
|
||||
where: 'entryId IN (${List.filled(entryIds.length, '?').join(',')})',
|
||||
whereArgs: entryIds,
|
||||
orderBy: 'orderNum',
|
||||
);
|
||||
|
||||
Future<List<Map<String, Object?>>> _kanjielementsQuery(
|
||||
DatabaseExecutor connection,
|
||||
List<int> entryIds,
|
||||
) => connection.query(
|
||||
JMdictTableNames.kanjiElement,
|
||||
where: 'entryId IN (${List.filled(entryIds.length, '?').join(',')})',
|
||||
whereArgs: entryIds,
|
||||
orderBy: 'orderNum',
|
||||
);
|
||||
|
||||
Future<List<Map<String, Object?>>> _jlpttagsQuery(
|
||||
DatabaseExecutor connection,
|
||||
List<int> entryIds,
|
||||
) => connection.query(
|
||||
TanosJLPTTableNames.jlptTag,
|
||||
where: 'entryId IN (${List.filled(entryIds.length, '?').join(',')})',
|
||||
whereArgs: entryIds,
|
||||
);
|
||||
|
||||
Future<List<Map<String, Object?>>> _commonentriesQuery(
|
||||
DatabaseExecutor connection,
|
||||
List<int> entryIds,
|
||||
) => connection.query(
|
||||
'JMdict_EntryCommon',
|
||||
where: 'entryId IN (${List.filled(entryIds.length, '?').join(',')})',
|
||||
whereArgs: entryIds,
|
||||
);
|
||||
|
||||
// Sense queries
|
||||
|
||||
Future<List<Map<String, Object?>>> _senseantonymsQuery(
|
||||
DatabaseExecutor connection,
|
||||
List<int> senseIds,
|
||||
) => connection.rawQuery(
|
||||
"""
|
||||
SELECT
|
||||
"${JMdictTableNames.senseAntonyms}".senseId,
|
||||
"${JMdictTableNames.senseAntonyms}".ambiguous,
|
||||
"${JMdictTableNames.senseAntonyms}".xrefEntryId,
|
||||
"JMdict_BaseAndFurigana"."base",
|
||||
"JMdict_BaseAndFurigana"."furigana"
|
||||
FROM "${JMdictTableNames.senseAntonyms}"
|
||||
JOIN "JMdict_BaseAndFurigana"
|
||||
ON "${JMdictTableNames.senseAntonyms}"."xrefEntryId" = "JMdict_BaseAndFurigana"."entryId"
|
||||
WHERE
|
||||
"senseId" IN (${List.filled(senseIds.length, '?').join(',')})
|
||||
AND "JMdict_BaseAndFurigana"."isFirst"
|
||||
ORDER BY
|
||||
"${JMdictTableNames.senseAntonyms}"."senseId",
|
||||
"${JMdictTableNames.senseAntonyms}"."xrefEntryId"
|
||||
""",
|
||||
[...senseIds],
|
||||
);
|
||||
|
||||
Future<List<Map<String, Object?>>> _senseseealsosQuery(
|
||||
DatabaseExecutor connection,
|
||||
List<int> senseIds,
|
||||
) => connection.rawQuery(
|
||||
"""
|
||||
SELECT
|
||||
"${JMdictTableNames.senseSeeAlso}"."senseId",
|
||||
"${JMdictTableNames.senseSeeAlso}"."ambiguous",
|
||||
"${JMdictTableNames.senseSeeAlso}"."xrefEntryId",
|
||||
"JMdict_BaseAndFurigana"."base",
|
||||
"JMdict_BaseAndFurigana"."furigana"
|
||||
FROM "${JMdictTableNames.senseSeeAlso}"
|
||||
JOIN "JMdict_BaseAndFurigana"
|
||||
ON "${JMdictTableNames.senseSeeAlso}"."xrefEntryId" = "JMdict_BaseAndFurigana"."entryId"
|
||||
WHERE
|
||||
"senseId" IN (${List.filled(senseIds.length, '?').join(',')})
|
||||
AND "JMdict_BaseAndFurigana"."isFirst"
|
||||
ORDER BY
|
||||
"${JMdictTableNames.senseSeeAlso}"."senseId",
|
||||
"${JMdictTableNames.senseSeeAlso}"."xrefEntryId"
|
||||
""",
|
||||
[...senseIds],
|
||||
);
|
||||
|
||||
Future<List<Map<String, Object?>>> _sensedialectsQuery(
|
||||
DatabaseExecutor connection,
|
||||
List<int> senseIds,
|
||||
) => connection.query(
|
||||
JMdictTableNames.senseDialect,
|
||||
where: 'senseId IN (${List.filled(senseIds.length, '?').join(',')})',
|
||||
whereArgs: senseIds,
|
||||
);
|
||||
|
||||
Future<List<Map<String, Object?>>> _sensefieldsQuery(
|
||||
DatabaseExecutor connection,
|
||||
List<int> senseIds,
|
||||
) => connection.query(
|
||||
JMdictTableNames.senseField,
|
||||
where: 'senseId IN (${List.filled(senseIds.length, '?').join(',')})',
|
||||
whereArgs: senseIds,
|
||||
);
|
||||
|
||||
Future<List<Map<String, Object?>>> _senseglossariesQuery(
|
||||
DatabaseExecutor connection,
|
||||
List<int> senseIds,
|
||||
) => connection.query(
|
||||
JMdictTableNames.senseGlossary,
|
||||
where: 'senseId IN (${List.filled(senseIds.length, '?').join(',')})',
|
||||
whereArgs: senseIds,
|
||||
);
|
||||
|
||||
Future<List<Map<String, Object?>>> _senseinfosQuery(
|
||||
DatabaseExecutor connection,
|
||||
List<int> senseIds,
|
||||
) => connection.query(
|
||||
JMdictTableNames.senseInfo,
|
||||
where: 'senseId IN (${List.filled(senseIds.length, '?').join(',')})',
|
||||
whereArgs: senseIds,
|
||||
);
|
||||
|
||||
Future<List<Map<String, Object?>>> _senselanguagesourcesQuery(
|
||||
DatabaseExecutor connection,
|
||||
List<int> senseIds,
|
||||
) => connection.query(
|
||||
JMdictTableNames.senseLanguageSource,
|
||||
where: 'senseId IN (${List.filled(senseIds.length, '?').join(',')})',
|
||||
whereArgs: senseIds,
|
||||
);
|
||||
|
||||
Future<List<Map<String, Object?>>> _sensemiscsQuery(
|
||||
DatabaseExecutor connection,
|
||||
List<int> senseIds,
|
||||
) => connection.query(
|
||||
JMdictTableNames.senseMisc,
|
||||
where: 'senseId IN (${List.filled(senseIds.length, '?').join(',')})',
|
||||
whereArgs: senseIds,
|
||||
);
|
||||
|
||||
Future<List<Map<String, Object?>>> _sensepossQuery(
|
||||
DatabaseExecutor connection,
|
||||
List<int> senseIds,
|
||||
) => connection.query(
|
||||
JMdictTableNames.sensePOS,
|
||||
where: 'senseId IN (${List.filled(senseIds.length, '?').join(',')})',
|
||||
whereArgs: senseIds,
|
||||
);
|
||||
|
||||
Future<List<Map<String, Object?>>> _senserestrictedtokanjisQuery(
|
||||
DatabaseExecutor connection,
|
||||
List<int> senseIds,
|
||||
) => connection.query(
|
||||
JMdictTableNames.senseRestrictedToKanji,
|
||||
where: 'senseId IN (${List.filled(senseIds.length, '?').join(',')})',
|
||||
whereArgs: senseIds,
|
||||
);
|
||||
|
||||
Future<List<Map<String, Object?>>> _senserestrictedtoreadingsQuery(
|
||||
DatabaseExecutor connection,
|
||||
List<int> senseIds,
|
||||
) => connection.query(
|
||||
JMdictTableNames.senseRestrictedToReading,
|
||||
where: 'senseId IN (${List.filled(senseIds.length, '?').join(',')})',
|
||||
whereArgs: senseIds,
|
||||
);
|
||||
|
||||
Future<List<Map<String, Object?>>> _examplesentencesQuery(
|
||||
DatabaseExecutor connection,
|
||||
List<int> senseIds,
|
||||
) => connection.query(
|
||||
'JMdict_ExampleSentence',
|
||||
where: 'senseId IN (${List.filled(senseIds.length, '?').join(',')})',
|
||||
whereArgs: senseIds,
|
||||
);
|
||||
|
||||
// Reading/kanji elements queries
|
||||
|
||||
Future<List<Map<String, Object?>>> _readingelementinfosQuery(
|
||||
DatabaseExecutor connection,
|
||||
List<int> readingIds,
|
||||
) => connection.query(
|
||||
JMdictTableNames.readingInfo,
|
||||
where: '(elementId) IN (${List.filled(readingIds.length, '?').join(',')})',
|
||||
whereArgs: readingIds,
|
||||
);
|
||||
|
||||
Future<List<Map<String, Object?>>> _readingelementrestrictionsQuery(
|
||||
DatabaseExecutor connection,
|
||||
List<int> readingIds,
|
||||
) => connection.query(
|
||||
JMdictTableNames.readingRestriction,
|
||||
where: '(elementId) IN (${List.filled(readingIds.length, '?').join(',')})',
|
||||
whereArgs: readingIds,
|
||||
);
|
||||
|
||||
Future<List<Map<String, Object?>>> _kanjielementinfosQuery(
|
||||
DatabaseExecutor connection,
|
||||
List<int> kanjiIds,
|
||||
) => connection.query(
|
||||
JMdictTableNames.kanjiInfo,
|
||||
where: '(elementId) IN (${List.filled(kanjiIds.length, '?').join(',')})',
|
||||
whereArgs: kanjiIds,
|
||||
);
|
||||
|
||||
// Xref queries
|
||||
|
||||
Future<LinearWordQueryData?> _senseantonymdataQuery(
|
||||
DatabaseExecutor connection,
|
||||
List<int> entryIds,
|
||||
) => fetchLinearWordQueryData(connection, entryIds, fetchXrefData: false);
|
||||
|
||||
Future<LinearWordQueryData?> _senseseealsodataQuery(
|
||||
DatabaseExecutor connection,
|
||||
List<int> entryIds,
|
||||
) => fetchLinearWordQueryData(connection, entryIds, fetchXrefData: false);
|
||||
|
||||
// Full query
|
||||
|
||||
Future<LinearWordQueryData> fetchLinearWordQueryData(
|
||||
DatabaseExecutor connection,
|
||||
List<int> entryIds, {
|
||||
bool fetchXrefData = true,
|
||||
}) async {
|
||||
late final List<Map<String, Object?>> senses;
|
||||
final Future<List<Map<String, Object?>>> sensesQuery = connection.query(
|
||||
JMdictTableNames.sense,
|
||||
where: 'entryId IN (${List.filled(entryIds.length, '?').join(',')})',
|
||||
whereArgs: entryIds,
|
||||
);
|
||||
|
||||
late final List<Map<String, Object?>> readingElements;
|
||||
final Future<List<Map<String, Object?>>> readingelementsQuery = connection
|
||||
.query(
|
||||
JMdictTableNames.readingElement,
|
||||
where: 'entryId IN (${List.filled(entryIds.length, '?').join(',')})',
|
||||
whereArgs: entryIds,
|
||||
orderBy: 'orderNum',
|
||||
);
|
||||
|
||||
late final List<Map<String, Object?>> kanjiElements;
|
||||
final Future<List<Map<String, Object?>>> kanjielementsQuery = connection
|
||||
.query(
|
||||
JMdictTableNames.kanjiElement,
|
||||
where: 'entryId IN (${List.filled(entryIds.length, '?').join(',')})',
|
||||
whereArgs: entryIds,
|
||||
orderBy: 'orderNum',
|
||||
);
|
||||
|
||||
late final List<Map<String, Object?>> jlptTags;
|
||||
final Future<List<Map<String, Object?>>> jlpttagsQuery = connection.query(
|
||||
TanosJLPTTableNames.jlptTag,
|
||||
where: 'entryId IN (${List.filled(entryIds.length, '?').join(',')})',
|
||||
whereArgs: entryIds,
|
||||
);
|
||||
|
||||
late final List<Map<String, Object?>> commonEntries;
|
||||
final Future<List<Map<String, Object?>>> commonentriesQuery = connection
|
||||
.query(
|
||||
'JMdict_EntryCommon',
|
||||
where: 'entryId IN (${List.filled(entryIds.length, '?').join(',')})',
|
||||
whereArgs: entryIds,
|
||||
);
|
||||
|
||||
await Future.wait([
|
||||
sensesQuery.then((value) => senses = value),
|
||||
readingelementsQuery.then((value) => readingElements = value),
|
||||
kanjielementsQuery.then((value) => kanjiElements = value),
|
||||
jlpttagsQuery.then((value) => jlptTags = value),
|
||||
commonentriesQuery.then((value) => commonEntries = value),
|
||||
_sensesQuery(connection, entryIds).then((value) => senses = value),
|
||||
_readingelementsQuery(
|
||||
connection,
|
||||
entryIds,
|
||||
).then((value) => readingElements = value),
|
||||
_kanjielementsQuery(
|
||||
connection,
|
||||
entryIds,
|
||||
).then((value) => kanjiElements = value),
|
||||
_jlpttagsQuery(connection, entryIds).then((value) => jlptTags = value),
|
||||
_commonentriesQuery(
|
||||
connection,
|
||||
entryIds,
|
||||
).then((value) => commonEntries = value),
|
||||
]);
|
||||
|
||||
// Sense queries
|
||||
|
||||
final senseIds = senses.map((sense) => sense['senseId'] as int).toList();
|
||||
|
||||
late final List<Map<String, Object?>> senseAntonyms;
|
||||
final Future<List<Map<String, Object?>>> senseantonymsQuery = connection
|
||||
.rawQuery(
|
||||
"""
|
||||
SELECT
|
||||
"${JMdictTableNames.senseAntonyms}".senseId,
|
||||
"${JMdictTableNames.senseAntonyms}".ambiguous,
|
||||
"${JMdictTableNames.senseAntonyms}".xrefEntryId,
|
||||
"JMdict_BaseAndFurigana"."base",
|
||||
"JMdict_BaseAndFurigana"."furigana"
|
||||
FROM "${JMdictTableNames.senseAntonyms}"
|
||||
JOIN "JMdict_BaseAndFurigana"
|
||||
ON "${JMdictTableNames.senseAntonyms}"."xrefEntryId" = "JMdict_BaseAndFurigana"."entryId"
|
||||
WHERE
|
||||
"senseId" IN (${List.filled(senseIds.length, '?').join(',')})
|
||||
AND "JMdict_BaseAndFurigana"."isFirst"
|
||||
ORDER BY
|
||||
"${JMdictTableNames.senseAntonyms}"."senseId",
|
||||
"${JMdictTableNames.senseAntonyms}"."xrefEntryId"
|
||||
""",
|
||||
[...senseIds],
|
||||
);
|
||||
|
||||
late final List<Map<String, Object?>> senseDialects;
|
||||
final Future<List<Map<String, Object?>>> sensedialectsQuery = connection
|
||||
.query(
|
||||
JMdictTableNames.senseDialect,
|
||||
where: 'senseId IN (${List.filled(senseIds.length, '?').join(',')})',
|
||||
whereArgs: senseIds,
|
||||
);
|
||||
|
||||
late final List<Map<String, Object?>> senseFields;
|
||||
final Future<List<Map<String, Object?>>> sensefieldsQuery = connection.query(
|
||||
JMdictTableNames.senseField,
|
||||
where: 'senseId IN (${List.filled(senseIds.length, '?').join(',')})',
|
||||
whereArgs: senseIds,
|
||||
);
|
||||
|
||||
late final List<Map<String, Object?>> senseGlossaries;
|
||||
final Future<List<Map<String, Object?>>> senseglossariesQuery = connection
|
||||
.query(
|
||||
JMdictTableNames.senseGlossary,
|
||||
where: 'senseId IN (${List.filled(senseIds.length, '?').join(',')})',
|
||||
whereArgs: senseIds,
|
||||
);
|
||||
|
||||
late final List<Map<String, Object?>> senseInfos;
|
||||
final Future<List<Map<String, Object?>>> senseinfosQuery = connection.query(
|
||||
JMdictTableNames.senseInfo,
|
||||
where: 'senseId IN (${List.filled(senseIds.length, '?').join(',')})',
|
||||
whereArgs: senseIds,
|
||||
);
|
||||
|
||||
late final List<Map<String, Object?>> senseLanguageSources;
|
||||
final Future<List<Map<String, Object?>>> senselanguagesourcesQuery =
|
||||
connection.query(
|
||||
JMdictTableNames.senseLanguageSource,
|
||||
where: 'senseId IN (${List.filled(senseIds.length, '?').join(',')})',
|
||||
whereArgs: senseIds,
|
||||
);
|
||||
|
||||
late final List<Map<String, Object?>> senseMiscs;
|
||||
final Future<List<Map<String, Object?>>> sensemiscsQuery = connection.query(
|
||||
JMdictTableNames.senseMisc,
|
||||
where: 'senseId IN (${List.filled(senseIds.length, '?').join(',')})',
|
||||
whereArgs: senseIds,
|
||||
);
|
||||
|
||||
late final List<Map<String, Object?>> sensePOSs;
|
||||
final Future<List<Map<String, Object?>>> sensepossQuery = connection.query(
|
||||
JMdictTableNames.sensePOS,
|
||||
where: 'senseId IN (${List.filled(senseIds.length, '?').join(',')})',
|
||||
whereArgs: senseIds,
|
||||
);
|
||||
|
||||
late final List<Map<String, Object?>> senseRestrictedToKanjis;
|
||||
final Future<List<Map<String, Object?>>> senserestrictedtokanjisQuery =
|
||||
connection.query(
|
||||
JMdictTableNames.senseRestrictedToKanji,
|
||||
where: 'senseId IN (${List.filled(senseIds.length, '?').join(',')})',
|
||||
whereArgs: senseIds,
|
||||
);
|
||||
|
||||
late final List<Map<String, Object?>> senseRestrictedToReadings;
|
||||
final Future<List<Map<String, Object?>>> senserestrictedtoreadingsQuery =
|
||||
connection.query(
|
||||
JMdictTableNames.senseRestrictedToReading,
|
||||
where: 'senseId IN (${List.filled(senseIds.length, '?').join(',')})',
|
||||
whereArgs: senseIds,
|
||||
);
|
||||
|
||||
late final List<Map<String, Object?>> senseSeeAlsos;
|
||||
final Future<List<Map<String, Object?>>> senseseealsosQuery = connection
|
||||
.rawQuery(
|
||||
"""
|
||||
SELECT
|
||||
"${JMdictTableNames.senseSeeAlso}"."senseId",
|
||||
"${JMdictTableNames.senseSeeAlso}"."ambiguous",
|
||||
"${JMdictTableNames.senseSeeAlso}"."xrefEntryId",
|
||||
"JMdict_BaseAndFurigana"."base",
|
||||
"JMdict_BaseAndFurigana"."furigana"
|
||||
FROM "${JMdictTableNames.senseSeeAlso}"
|
||||
JOIN "JMdict_BaseAndFurigana"
|
||||
ON "${JMdictTableNames.senseSeeAlso}"."xrefEntryId" = "JMdict_BaseAndFurigana"."entryId"
|
||||
WHERE
|
||||
"senseId" IN (${List.filled(senseIds.length, '?').join(',')})
|
||||
AND "JMdict_BaseAndFurigana"."isFirst"
|
||||
ORDER BY
|
||||
"${JMdictTableNames.senseSeeAlso}"."senseId",
|
||||
"${JMdictTableNames.senseSeeAlso}"."xrefEntryId"
|
||||
""",
|
||||
[...senseIds],
|
||||
);
|
||||
|
||||
late final List<Map<String, Object?>> exampleSentences;
|
||||
final Future<List<Map<String, Object?>>> examplesentencesQuery = connection
|
||||
.query(
|
||||
'JMdict_ExampleSentence',
|
||||
where: 'senseId IN (${List.filled(senseIds.length, '?').join(',')})',
|
||||
whereArgs: senseIds,
|
||||
);
|
||||
|
||||
// Reading queries
|
||||
|
||||
final readingIds = readingElements
|
||||
.map((element) => element['elementId'] as int)
|
||||
.toList();
|
||||
|
||||
late final List<Map<String, Object?>> readingElementInfos;
|
||||
final Future<List<Map<String, Object?>>> readingelementinfosQuery =
|
||||
connection.query(
|
||||
JMdictTableNames.readingInfo,
|
||||
where:
|
||||
'(elementId) IN (${List.filled(readingIds.length, '?').join(',')})',
|
||||
whereArgs: readingIds,
|
||||
);
|
||||
|
||||
late final List<Map<String, Object?>> readingElementRestrictions;
|
||||
final Future<List<Map<String, Object?>>> readingelementrestrictionsQuery =
|
||||
connection.query(
|
||||
JMdictTableNames.readingRestriction,
|
||||
where:
|
||||
'(elementId) IN (${List.filled(readingIds.length, '?').join(',')})',
|
||||
whereArgs: readingIds,
|
||||
);
|
||||
|
||||
// Kanji queries
|
||||
|
||||
final kanjiIds = kanjiElements
|
||||
.map((element) => element['elementId'] as int)
|
||||
.toList();
|
||||
|
||||
late final List<Map<String, Object?>> readingElementInfos;
|
||||
late final List<Map<String, Object?>> readingElementRestrictions;
|
||||
|
||||
late final List<Map<String, Object?>> kanjiElementInfos;
|
||||
final Future<List<Map<String, Object?>>> kanjielementinfosQuery = connection
|
||||
.query(
|
||||
JMdictTableNames.kanjiInfo,
|
||||
where:
|
||||
'(elementId) IN (${List.filled(kanjiIds.length, '?').join(',')})',
|
||||
whereArgs: kanjiIds,
|
||||
);
|
||||
|
||||
// Xref data queries
|
||||
await Future.wait([
|
||||
senseantonymsQuery.then((value) => senseAntonyms = value),
|
||||
senseseealsosQuery.then((value) => senseSeeAlsos = value),
|
||||
_senseantonymsQuery(
|
||||
connection,
|
||||
senseIds,
|
||||
).then((value) => senseAntonyms = value),
|
||||
_senseseealsosQuery(
|
||||
connection,
|
||||
senseIds,
|
||||
).then((value) => senseSeeAlsos = value),
|
||||
]);
|
||||
|
||||
late final LinearWordQueryData? senseAntonymData;
|
||||
final Future<LinearWordQueryData?> senseantonymdataQuery =
|
||||
fetchXrefData
|
||||
? fetchLinearWordQueryData(
|
||||
connection,
|
||||
senseAntonyms
|
||||
.map((antonym) => antonym['xrefEntryId'] as int)
|
||||
.toList(),
|
||||
fetchXrefData: false,
|
||||
)
|
||||
: Future.value(null);
|
||||
|
||||
late final LinearWordQueryData? senseSeeAlsoData;
|
||||
final Future<LinearWordQueryData?> senseseealsodataQuery =
|
||||
fetchXrefData
|
||||
? fetchLinearWordQueryData(
|
||||
connection,
|
||||
senseSeeAlsos.map((seeAlso) => seeAlso['xrefEntryId'] as int).toList(),
|
||||
fetchXrefData: false,
|
||||
)
|
||||
: Future.value(null);
|
||||
LinearWordQueryData? senseAntonymData;
|
||||
LinearWordQueryData? senseSeeAlsoData;
|
||||
|
||||
await Future.wait([
|
||||
sensedialectsQuery.then((value) => senseDialects = value),
|
||||
sensefieldsQuery.then((value) => senseFields = value),
|
||||
senseglossariesQuery.then((value) => senseGlossaries = value),
|
||||
senseinfosQuery.then((value) => senseInfos = value),
|
||||
senselanguagesourcesQuery.then((value) => senseLanguageSources = value),
|
||||
sensemiscsQuery.then((value) => senseMiscs = value),
|
||||
sensepossQuery.then((value) => sensePOSs = value),
|
||||
senserestrictedtokanjisQuery.then(
|
||||
(value) => senseRestrictedToKanjis = value,
|
||||
),
|
||||
senserestrictedtoreadingsQuery.then(
|
||||
(value) => senseRestrictedToReadings = value,
|
||||
),
|
||||
examplesentencesQuery.then((value) => exampleSentences = value),
|
||||
readingelementinfosQuery.then((value) => readingElementInfos = value),
|
||||
readingelementrestrictionsQuery.then(
|
||||
(value) => readingElementRestrictions = value,
|
||||
),
|
||||
kanjielementinfosQuery.then((value) => kanjiElementInfos = value),
|
||||
senseantonymdataQuery.then((value) => senseAntonymData = value),
|
||||
senseseealsodataQuery.then((value) => senseSeeAlsoData = value),
|
||||
_sensedialectsQuery(
|
||||
connection,
|
||||
senseIds,
|
||||
).then((value) => senseDialects = value),
|
||||
_sensefieldsQuery(
|
||||
connection,
|
||||
senseIds,
|
||||
).then((value) => senseFields = value),
|
||||
_senseglossariesQuery(
|
||||
connection,
|
||||
senseIds,
|
||||
).then((value) => senseGlossaries = value),
|
||||
_senseinfosQuery(connection, senseIds).then((value) => senseInfos = value),
|
||||
_senselanguagesourcesQuery(
|
||||
connection,
|
||||
senseIds,
|
||||
).then((value) => senseLanguageSources = value),
|
||||
_sensemiscsQuery(connection, senseIds).then((value) => senseMiscs = value),
|
||||
_sensepossQuery(connection, senseIds).then((value) => sensePOSs = value),
|
||||
_senserestrictedtokanjisQuery(
|
||||
connection,
|
||||
senseIds,
|
||||
).then((value) => senseRestrictedToKanjis = value),
|
||||
_senserestrictedtoreadingsQuery(
|
||||
connection,
|
||||
senseIds,
|
||||
).then((value) => senseRestrictedToReadings = value),
|
||||
_examplesentencesQuery(
|
||||
connection,
|
||||
senseIds,
|
||||
).then((value) => exampleSentences = value),
|
||||
_readingelementinfosQuery(
|
||||
connection,
|
||||
readingIds,
|
||||
).then((value) => readingElementInfos = value),
|
||||
_readingelementrestrictionsQuery(
|
||||
connection,
|
||||
readingIds,
|
||||
).then((value) => readingElementRestrictions = value),
|
||||
_kanjielementinfosQuery(
|
||||
connection,
|
||||
kanjiIds,
|
||||
).then((value) => kanjiElementInfos = value),
|
||||
|
||||
if (fetchXrefData)
|
||||
_senseantonymdataQuery(
|
||||
connection,
|
||||
senseAntonyms.map((antonym) => antonym['xrefEntryId'] as int).toList(),
|
||||
).then((value) => senseAntonymData = value),
|
||||
|
||||
if (fetchXrefData)
|
||||
_senseseealsodataQuery(
|
||||
connection,
|
||||
senseSeeAlsos.map((seeAlso) => seeAlso['xrefEntryId'] as int).toList(),
|
||||
).then((value) => senseSeeAlsoData = value),
|
||||
]);
|
||||
|
||||
return LinearWordQueryData(
|
||||
|
||||
@@ -15,15 +15,15 @@ SearchMode _determineSearchMode(String word) {
|
||||
final bool containsAscii = RegExp(r'[A-Za-z]').hasMatch(word);
|
||||
|
||||
if (containsKanji && containsAscii) {
|
||||
return SearchMode.MixedKanji;
|
||||
return SearchMode.mixedKanji;
|
||||
} else if (containsKanji) {
|
||||
return SearchMode.Kanji;
|
||||
return SearchMode.kanji;
|
||||
} else if (containsAscii) {
|
||||
return SearchMode.English;
|
||||
return SearchMode.english;
|
||||
} else if (word.contains(hiraganaRegex) || word.contains(katakanaRegex)) {
|
||||
return SearchMode.Kana;
|
||||
return SearchMode.kana;
|
||||
} else {
|
||||
return SearchMode.MixedKana;
|
||||
return SearchMode.mixedKana;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -199,23 +199,23 @@ Future<List<ScoredEntryId>> _queryEnglish(
|
||||
SELECT
|
||||
"${JMdictTableNames.sense}"."entryId",
|
||||
MAX("JMdict_EntryScore"."score")
|
||||
+ (("${JMdictTableNames.senseGlossary}"."phrase" = ? AND "${JMdictTableNames.sense}"."orderNum" = 1) * 50)
|
||||
+ (("${JMdictTableNames.senseGlossary}"."phrase" = ? AND "${JMdictTableNames.sense}"."orderNum" = 2) * 30)
|
||||
+ (("${JMdictTableNames.senseGlossary}"."phrase" = ?) * 20)
|
||||
+ (("${JMdictTableNames.senseGlossary}"."phrase" = ?1 AND "${JMdictTableNames.sense}"."orderNum" = 1) * 50)
|
||||
+ (("${JMdictTableNames.senseGlossary}"."phrase" = ?1 AND "${JMdictTableNames.sense}"."orderNum" = 2) * 30)
|
||||
+ (("${JMdictTableNames.senseGlossary}"."phrase" = ?1) * 20)
|
||||
as "score"
|
||||
FROM "${JMdictTableNames.senseGlossary}"
|
||||
JOIN "${JMdictTableNames.sense}" USING ("senseId")
|
||||
JOIN "JMdict_EntryScore" USING ("entryId")
|
||||
WHERE "${JMdictTableNames.senseGlossary}"."phrase" LIKE ?
|
||||
WHERE "${JMdictTableNames.senseGlossary}"."phrase" LIKE ?2
|
||||
GROUP BY "JMdict_EntryScore"."entryId"
|
||||
ORDER BY
|
||||
"score" DESC,
|
||||
"${JMdictTableNames.sense}"."entryId" ASC
|
||||
LIMIT ?
|
||||
OFFSET ?
|
||||
${pageSize != null ? 'LIMIT ?3' : ''}
|
||||
${offset != null ? 'OFFSET ?4' : ''}
|
||||
'''
|
||||
.trim(),
|
||||
[word, word, word, '%${word.replaceAll('%', '')}%', pageSize, offset],
|
||||
[word, '%${word.replaceAll('%', '')}%', if (pageSize != null) pageSize, if (offset != null) offset],
|
||||
);
|
||||
|
||||
return result
|
||||
@@ -246,7 +246,7 @@ Future<List<ScoredEntryId>> fetchEntryIds(
|
||||
int? pageSize,
|
||||
int? offset,
|
||||
) async {
|
||||
if (searchMode == SearchMode.Auto) {
|
||||
if (searchMode == SearchMode.auto) {
|
||||
searchMode = _determineSearchMode(word);
|
||||
}
|
||||
|
||||
@@ -254,20 +254,20 @@ Future<List<ScoredEntryId>> fetchEntryIds(
|
||||
|
||||
late final List<ScoredEntryId> entryIds;
|
||||
switch (searchMode) {
|
||||
case SearchMode.Kanji:
|
||||
case SearchMode.kanji:
|
||||
entryIds = await _queryKanji(connection, word, pageSize, offset);
|
||||
break;
|
||||
|
||||
case SearchMode.Kana:
|
||||
case SearchMode.kana:
|
||||
entryIds = await _queryKana(connection, word, pageSize, offset);
|
||||
break;
|
||||
|
||||
case SearchMode.English:
|
||||
case SearchMode.english:
|
||||
entryIds = await _queryEnglish(connection, word, pageSize, offset);
|
||||
break;
|
||||
|
||||
case SearchMode.MixedKana:
|
||||
case SearchMode.MixedKanji:
|
||||
case SearchMode.mixedKana:
|
||||
case SearchMode.mixedKanji:
|
||||
default:
|
||||
throw UnimplementedError('Search mode $searchMode is not implemented');
|
||||
}
|
||||
@@ -280,7 +280,7 @@ Future<int?> fetchEntryIdCount(
|
||||
String word,
|
||||
SearchMode searchMode,
|
||||
) async {
|
||||
if (searchMode == SearchMode.Auto) {
|
||||
if (searchMode == SearchMode.auto) {
|
||||
searchMode = _determineSearchMode(word);
|
||||
}
|
||||
|
||||
@@ -289,20 +289,20 @@ Future<int?> fetchEntryIdCount(
|
||||
late final int? entryIdCount;
|
||||
|
||||
switch (searchMode) {
|
||||
case SearchMode.Kanji:
|
||||
case SearchMode.kanji:
|
||||
entryIdCount = await _queryKanjiCount(connection, word);
|
||||
break;
|
||||
|
||||
case SearchMode.Kana:
|
||||
case SearchMode.kana:
|
||||
entryIdCount = await _queryKanaCount(connection, word);
|
||||
break;
|
||||
|
||||
case SearchMode.English:
|
||||
case SearchMode.english:
|
||||
entryIdCount = await _queryEnglishCount(connection, word);
|
||||
break;
|
||||
|
||||
case SearchMode.MixedKana:
|
||||
case SearchMode.MixedKanji:
|
||||
case SearchMode.mixedKana:
|
||||
case SearchMode.mixedKanji:
|
||||
default:
|
||||
throw UnimplementedError('Search mode $searchMode is not implemented');
|
||||
}
|
||||
|
||||
@@ -21,50 +21,84 @@ List<WordSearchResult> regroupWordSearchResults({
|
||||
}) {
|
||||
final List<WordSearchResult> results = [];
|
||||
|
||||
final commonEntryIds = linearWordQueryData.commonEntries
|
||||
final Set<int> commonEntryIds = linearWordQueryData.commonEntries
|
||||
.map((entry) => entry['entryId'] as int)
|
||||
.toSet();
|
||||
|
||||
final Map<int, List<Map<String, Object?>>> entryReadingElementsByEntryId =
|
||||
linearWordQueryData.readingElements.groupListsBy(
|
||||
(element) => element['entryId'] as int,
|
||||
);
|
||||
|
||||
final Map<int, List<Map<String, Object?>>> entryKanjiElementsByEntryId =
|
||||
linearWordQueryData.kanjiElements.groupListsBy(
|
||||
(element) => element['entryId'] as int,
|
||||
);
|
||||
|
||||
final Map<int, int> elementIdToEntryId = {
|
||||
for (final element in linearWordQueryData.readingElements)
|
||||
element['elementId'] as int: element['entryId'] as int,
|
||||
for (final element in linearWordQueryData.kanjiElements)
|
||||
element['elementId'] as int: element['entryId'] as int,
|
||||
};
|
||||
|
||||
final Map<int, List<Map<String, Object?>>> entryReadingElementInfosByEntryId =
|
||||
linearWordQueryData.readingElementInfos.groupListsBy(
|
||||
(element) => elementIdToEntryId[element['elementId'] as int]!,
|
||||
);
|
||||
|
||||
final Map<int, List<Map<String, Object?>>> entryKanjiElementInfosByEntryId =
|
||||
linearWordQueryData.kanjiElementInfos.groupListsBy(
|
||||
(element) => elementIdToEntryId[element['elementId'] as int]!,
|
||||
);
|
||||
|
||||
final Map<int, List<Map<String, Object?>>>
|
||||
entryReadingElementRestrictionsByEntryId = linearWordQueryData
|
||||
.readingElementRestrictions
|
||||
.groupListsBy(
|
||||
(element) => elementIdToEntryId[element['elementId'] as int]!,
|
||||
);
|
||||
|
||||
final Map<int, JlptLevel> entryJlptTagsByEntryId = linearWordQueryData
|
||||
.jlptTags
|
||||
.groupSetsBy((element) => element['entryId'] as int)
|
||||
.map(
|
||||
(final key, final value) => MapEntry(
|
||||
key,
|
||||
value.map((e) => JlptLevel.fromString(e['jlptLevel'] as String?)).min,
|
||||
),
|
||||
);
|
||||
|
||||
final Map<int, List<Map<String, Object?>>> entrySensesByEntryId =
|
||||
linearWordQueryData.senses.groupListsBy(
|
||||
(element) => element['entryId'] as int,
|
||||
);
|
||||
|
||||
for (final scoredEntryId in entryIds) {
|
||||
final List<Map<String, Object?>> entryReadingElements = linearWordQueryData
|
||||
.readingElements
|
||||
.where((element) => element['entryId'] == scoredEntryId.entryId)
|
||||
.toList();
|
||||
final List<Map<String, Object?>> entryReadingElements =
|
||||
entryReadingElementsByEntryId[scoredEntryId.entryId] ?? const [];
|
||||
final List<Map<String, Object?>> entryKanjiElements =
|
||||
entryKanjiElementsByEntryId[scoredEntryId.entryId] ?? const [];
|
||||
final List<Map<String, Object?>> entryReadingElementInfos =
|
||||
entryReadingElementInfosByEntryId[scoredEntryId.entryId] ?? const [];
|
||||
final List<Map<String, Object?>> entryKanjiElementInfos =
|
||||
entryKanjiElementInfosByEntryId[scoredEntryId.entryId] ?? const [];
|
||||
final List<Map<String, Object?>> entryReadingElementRestrictions =
|
||||
entryReadingElementRestrictionsByEntryId[scoredEntryId.entryId] ??
|
||||
const [];
|
||||
|
||||
final List<Map<String, Object?>> entryKanjiElements = linearWordQueryData
|
||||
.kanjiElements
|
||||
.where((element) => element['entryId'] == scoredEntryId.entryId)
|
||||
.toList();
|
||||
|
||||
final List<Map<String, Object?>> entryJlptTags = linearWordQueryData
|
||||
.jlptTags
|
||||
.where((element) => element['entryId'] == scoredEntryId.entryId)
|
||||
.toList();
|
||||
|
||||
final jlptLevel =
|
||||
entryJlptTags
|
||||
.map((e) => JlptLevel.fromString(e['jlptLevel'] as String?))
|
||||
.sorted((a, b) => b.compareTo(a))
|
||||
.firstOrNull ??
|
||||
JlptLevel.none;
|
||||
|
||||
final isCommon = commonEntryIds.contains(scoredEntryId.entryId);
|
||||
|
||||
final List<Map<String, Object?>> entrySenses = linearWordQueryData.senses
|
||||
.where((element) => element['entryId'] == scoredEntryId.entryId)
|
||||
.toList();
|
||||
|
||||
final GroupedWordResult entryReadingElementsGrouped = _regroup_words(
|
||||
entryId: scoredEntryId.entryId,
|
||||
readingElements: entryReadingElements,
|
||||
final GroupedWordResult entryReadingElementsGrouped = _regroupWords(
|
||||
kanjiElements: entryKanjiElements,
|
||||
readingElementInfos: linearWordQueryData.readingElementInfos,
|
||||
readingElementRestrictions:
|
||||
linearWordQueryData.readingElementRestrictions,
|
||||
kanjiElementInfos: linearWordQueryData.kanjiElementInfos,
|
||||
kanjiElementInfos: entryKanjiElementInfos,
|
||||
readingElements: entryReadingElements,
|
||||
readingElementInfos: entryReadingElementInfos,
|
||||
readingElementRestrictions: entryReadingElementRestrictions,
|
||||
);
|
||||
|
||||
final List<WordSearchSense> entrySensesGrouped = _regroup_senses(
|
||||
final List<Map<String, Object?>> entrySenses =
|
||||
entrySensesByEntryId[scoredEntryId.entryId] ?? const [];
|
||||
|
||||
final List<WordSearchSense> entrySensesGrouped = _regroupSenses(
|
||||
senses: entrySenses,
|
||||
senseAntonyms: linearWordQueryData.senseAntonyms,
|
||||
senseDialects: linearWordQueryData.senseDialects,
|
||||
@@ -82,6 +116,10 @@ List<WordSearchResult> regroupWordSearchResults({
|
||||
senseAntonymsXrefData: linearWordQueryData.senseAntonymData,
|
||||
);
|
||||
|
||||
final bool isCommon = commonEntryIds.contains(scoredEntryId.entryId);
|
||||
final JlptLevel jlptLevel =
|
||||
entryJlptTagsByEntryId[scoredEntryId.entryId] ?? JlptLevel.none;
|
||||
|
||||
results.add(
|
||||
WordSearchResult(
|
||||
score: scoredEntryId.score,
|
||||
@@ -112,8 +150,7 @@ class GroupedWordResult {
|
||||
});
|
||||
}
|
||||
|
||||
GroupedWordResult _regroup_words({
|
||||
required int entryId,
|
||||
GroupedWordResult _regroupWords({
|
||||
required List<Map<String, Object?>> kanjiElements,
|
||||
required List<Map<String, Object?>> kanjiElementInfos,
|
||||
required List<Map<String, Object?>> readingElements,
|
||||
@@ -122,36 +159,34 @@ GroupedWordResult _regroup_words({
|
||||
}) {
|
||||
final List<WordSearchRuby> rubys = [];
|
||||
|
||||
final kanjiElements_ = kanjiElements
|
||||
.where((element) => element['entryId'] == entryId)
|
||||
.toList();
|
||||
final Map<int, Set<String>> readingElementRestrictionsSet =
|
||||
readingElementRestrictions
|
||||
.groupSetsBy((element) => element['elementId'] as int)
|
||||
.map(
|
||||
(key, value) => MapEntry(
|
||||
key,
|
||||
value.map((e) => e['restriction'] as String).toSet(),
|
||||
),
|
||||
);
|
||||
|
||||
final readingElements_ = readingElements
|
||||
.where((element) => element['entryId'] == entryId)
|
||||
.toList();
|
||||
|
||||
final readingElementRestrictions_ = readingElementRestrictions
|
||||
.where((element) => element['entryId'] == entryId)
|
||||
.toList();
|
||||
|
||||
for (final readingElement in readingElements_) {
|
||||
if (readingElement['doesNotMatchKanji'] == 1 || kanjiElements_.isEmpty) {
|
||||
// Construct a cartesian product of kanji + readings, with exceptions made for items marked in `restrictions`.
|
||||
for (final readingElement in readingElements) {
|
||||
if (readingElement['doesNotMatchKanji'] == 1 || kanjiElements.isEmpty) {
|
||||
final ruby = WordSearchRuby(base: readingElement['reading'] as String);
|
||||
rubys.add(ruby);
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
for (final kanjiElement in kanjiElements_) {
|
||||
for (final kanjiElement in kanjiElements) {
|
||||
final kanji = kanjiElement['reading'] as String;
|
||||
final reading = readingElement['reading'] as String;
|
||||
|
||||
final restrictions = readingElementRestrictions_
|
||||
.where((element) => element['reading'] == reading)
|
||||
.toList();
|
||||
|
||||
if (restrictions.isNotEmpty &&
|
||||
!restrictions.any((element) => element['restriction'] == kanji)) {
|
||||
// The 'restrictions' act as an allowlist, meaning that non-matching kanji elements should be ignored.
|
||||
final restrictions =
|
||||
readingElementRestrictionsSet[readingElement['elementId'] as int] ??
|
||||
{};
|
||||
if (restrictions.isNotEmpty && !restrictions.contains(kanji)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -160,42 +195,37 @@ GroupedWordResult _regroup_words({
|
||||
}
|
||||
}
|
||||
|
||||
assert(rubys.isNotEmpty, 'No readings found for entryId: $entryId');
|
||||
assert(
|
||||
rubys.isNotEmpty,
|
||||
'No readings found for entryId: ${kanjiElements.firstOrNull?['entryId'] ?? readingElements.firstOrNull?['entryId'] ?? '???'}',
|
||||
);
|
||||
|
||||
final Map<int, String> readingElementIdsToReading = {
|
||||
for (final element in readingElements_)
|
||||
for (final element in readingElements)
|
||||
element['elementId'] as int: element['reading'] as String,
|
||||
};
|
||||
|
||||
final Map<int, String> kanjiElementIdsToReading = {
|
||||
for (final element in kanjiElements_)
|
||||
for (final element in kanjiElements)
|
||||
element['elementId'] as int: element['reading'] as String,
|
||||
};
|
||||
|
||||
final readingElementInfos_ = readingElementInfos
|
||||
.where((element) => element['entryId'] == entryId)
|
||||
.toList();
|
||||
|
||||
final kanjiElementInfos_ = kanjiElementInfos
|
||||
.where((element) => element['entryId'] == entryId)
|
||||
.toList();
|
||||
|
||||
return GroupedWordResult(
|
||||
rubys: rubys,
|
||||
readingInfos: {
|
||||
for (final rei in readingElementInfos_)
|
||||
for (final rei in readingElementInfos)
|
||||
readingElementIdsToReading[rei['elementId'] as int]!:
|
||||
JMdictReadingInfo.fromId(rei['info'] as String),
|
||||
},
|
||||
kanjiInfos: {
|
||||
for (final kei in kanjiElementInfos_)
|
||||
for (final kei in kanjiElementInfos)
|
||||
kanjiElementIdsToReading[kei['elementId'] as int]!:
|
||||
JMdictKanjiInfo.fromId(kei['info'] as String),
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
List<WordSearchSense> _regroup_senses({
|
||||
List<WordSearchSense> _regroupSenses({
|
||||
required List<Map<String, Object?>> senses,
|
||||
required List<Map<String, Object?>> senseAntonyms,
|
||||
required List<Map<String, Object?>> senseDialects,
|
||||
|
||||
@@ -13,12 +13,31 @@ import 'package:jadb/search/word_search/regrouping.dart';
|
||||
import 'package:jadb/table_names/jmdict.dart';
|
||||
import 'package:sqflite_common/sqlite_api.dart';
|
||||
|
||||
enum SearchMode { Auto, English, Kanji, MixedKanji, Kana, MixedKana }
|
||||
enum SearchMode {
|
||||
/// Try to autodetect what is being searched for
|
||||
auto,
|
||||
|
||||
/// Search for english words
|
||||
english,
|
||||
|
||||
/// Search for the kanji reading of a word
|
||||
kanji,
|
||||
|
||||
/// Search for the kanji reading of a word, mixed in with kana/romaji
|
||||
mixedKanji,
|
||||
|
||||
/// Search for the kana reading of a word
|
||||
kana,
|
||||
|
||||
/// Search for the kana reading of a word, mixed in with romaji
|
||||
mixedKana,
|
||||
}
|
||||
|
||||
/// Searches for an input string, returning a list of results with their details. Returns null if the input string is empty.
|
||||
Future<List<WordSearchResult>?> searchWordWithDbConnection(
|
||||
DatabaseExecutor connection,
|
||||
String word, {
|
||||
SearchMode searchMode = SearchMode.Auto,
|
||||
SearchMode searchMode = SearchMode.auto,
|
||||
int page = 0,
|
||||
int? pageSize,
|
||||
}) async {
|
||||
@@ -51,13 +70,18 @@ Future<List<WordSearchResult>?> searchWordWithDbConnection(
|
||||
linearWordQueryData: linearWordQueryData,
|
||||
);
|
||||
|
||||
for (final resultEntry in result) {
|
||||
resultEntry.inferMatchSpans(word, searchMode: searchMode);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/// Searches for an input string, returning the amount of results that the search would yield without pagination.
|
||||
Future<int?> searchWordCountWithDbConnection(
|
||||
DatabaseExecutor connection,
|
||||
String word, {
|
||||
SearchMode searchMode = SearchMode.Auto,
|
||||
SearchMode searchMode = SearchMode.auto,
|
||||
}) async {
|
||||
if (word.isEmpty) {
|
||||
return null;
|
||||
@@ -72,6 +96,7 @@ Future<int?> searchWordCountWithDbConnection(
|
||||
return entryIdCount;
|
||||
}
|
||||
|
||||
/// Fetches a single word by its entry ID, returning null if not found.
|
||||
Future<WordSearchResult?> getWordByIdWithDbConnection(
|
||||
DatabaseExecutor connection,
|
||||
int id,
|
||||
@@ -107,6 +132,7 @@ Future<WordSearchResult?> getWordByIdWithDbConnection(
|
||||
return result.firstOrNull;
|
||||
}
|
||||
|
||||
/// Fetches multiple words by their entry IDs, returning a map from entry ID to result.
|
||||
Future<Map<int, WordSearchResult>> getWordsByIdsWithDbConnection(
|
||||
DatabaseExecutor connection,
|
||||
Set<int> ids,
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
abstract class JMdictTableNames {
|
||||
static const String version = 'JMdict_Version';
|
||||
static const String entry = 'JMdict_Entry';
|
||||
static const String kanjiElement = 'JMdict_KanjiElement';
|
||||
static const String kanjiInfo = 'JMdict_KanjiElementInfo';
|
||||
@@ -20,6 +21,7 @@ abstract class JMdictTableNames {
|
||||
static const String senseSeeAlso = 'JMdict_SenseSeeAlso';
|
||||
|
||||
static Set<String> get allTables => {
|
||||
version,
|
||||
entry,
|
||||
kanjiElement,
|
||||
kanjiInfo,
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
abstract class KANJIDICTableNames {
|
||||
static const String version = 'KANJIDIC_Version';
|
||||
static const String character = 'KANJIDIC_Character';
|
||||
static const String radicalName = 'KANJIDIC_RadicalName';
|
||||
static const String codepoint = 'KANJIDIC_Codepoint';
|
||||
@@ -17,6 +18,7 @@ abstract class KANJIDICTableNames {
|
||||
static const String nanori = 'KANJIDIC_Nanori';
|
||||
|
||||
static Set<String> get allTables => {
|
||||
version,
|
||||
character,
|
||||
radicalName,
|
||||
codepoint,
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
abstract class RADKFILETableNames {
|
||||
static const String version = 'RADKFILE_Version';
|
||||
static const String radkfile = 'RADKFILE';
|
||||
|
||||
static Set<String> get allTables => {radkfile};
|
||||
static Set<String> get allTables => {version, radkfile};
|
||||
}
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
abstract class TanosJLPTTableNames {
|
||||
static const String version = 'JMdict_JLPT_Version';
|
||||
static const String jlptTag = 'JMdict_JLPTTag';
|
||||
|
||||
static Set<String> get allTables => {jlptTag};
|
||||
static Set<String> get allTables => {version, jlptTag};
|
||||
}
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import 'package:collection/collection.dart';
|
||||
import 'package:jadb/util/lemmatizer/rules.dart';
|
||||
|
||||
enum WordClass {
|
||||
@@ -10,6 +11,8 @@ enum WordClass {
|
||||
adverb,
|
||||
particle,
|
||||
input,
|
||||
|
||||
// TODO: add toString and fromString so it can be parsed by the cli
|
||||
}
|
||||
|
||||
enum LemmatizationRuleType { prefix, suffix }
|
||||
@@ -18,7 +21,7 @@ class LemmatizationRule {
|
||||
final String name;
|
||||
final AllomorphPattern pattern;
|
||||
final WordClass wordClass;
|
||||
final List<WordClass>? validChildClasses;
|
||||
final Set<WordClass>? validChildClasses;
|
||||
final bool terminal;
|
||||
|
||||
const LemmatizationRule({
|
||||
@@ -38,9 +41,9 @@ class LemmatizationRule {
|
||||
required String pattern,
|
||||
required String? replacement,
|
||||
required WordClass wordClass,
|
||||
validChildClasses,
|
||||
terminal = false,
|
||||
lookAheadBehind = const [''],
|
||||
Set<WordClass>? validChildClasses,
|
||||
bool terminal = false,
|
||||
List<Pattern> lookAheadBehind = const [''],
|
||||
LemmatizationRuleType type = LemmatizationRuleType.suffix,
|
||||
}) : this(
|
||||
name: name,
|
||||
@@ -55,6 +58,27 @@ class LemmatizationRule {
|
||||
terminal: terminal,
|
||||
wordClass: wordClass,
|
||||
);
|
||||
|
||||
@override
|
||||
int get hashCode => Object.hash(
|
||||
name,
|
||||
pattern,
|
||||
wordClass,
|
||||
validChildClasses,
|
||||
terminal,
|
||||
SetEquality().hash(validChildClasses),
|
||||
);
|
||||
|
||||
@override
|
||||
bool operator ==(Object other) {
|
||||
if (identical(this, other)) return true;
|
||||
return other is LemmatizationRule &&
|
||||
other.name == name &&
|
||||
other.pattern == pattern &&
|
||||
other.wordClass == wordClass &&
|
||||
other.terminal == terminal &&
|
||||
SetEquality().equals(validChildClasses, other.validChildClasses);
|
||||
}
|
||||
}
|
||||
|
||||
/// Represents a set of patterns for matching allomorphs in a word.
|
||||
@@ -71,6 +95,7 @@ class AllomorphPattern {
|
||||
this.lookAheadBehind = const [''],
|
||||
});
|
||||
|
||||
/// Convert the [patterns] into regexes
|
||||
List<(String, Pattern)> get allPatternCombinations {
|
||||
final combinations = <(String, Pattern)>[];
|
||||
for (final l in lookAheadBehind) {
|
||||
@@ -94,6 +119,7 @@ class AllomorphPattern {
|
||||
return combinations;
|
||||
}
|
||||
|
||||
/// Check whether an input string matches any of the [patterns]
|
||||
bool matches(String word) {
|
||||
for (final (_, p) in allPatternCombinations) {
|
||||
if (p is String) {
|
||||
@@ -111,6 +137,9 @@ class AllomorphPattern {
|
||||
return false;
|
||||
}
|
||||
|
||||
/// Apply the replacement for this pattern.
|
||||
///
|
||||
/// If none of the [patterns] apply, this function returns `null`.
|
||||
List<String>? apply(String word) {
|
||||
for (final (affix, p) in allPatternCombinations) {
|
||||
switch ((type, p is RegExp)) {
|
||||
@@ -157,6 +186,22 @@ class AllomorphPattern {
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
@override
|
||||
int get hashCode => Object.hash(
|
||||
type,
|
||||
ListEquality().hash(lookAheadBehind),
|
||||
MapEquality().hash(patterns),
|
||||
);
|
||||
|
||||
@override
|
||||
bool operator ==(Object other) {
|
||||
if (identical(this, other)) return true;
|
||||
return other is AllomorphPattern &&
|
||||
other.type == type &&
|
||||
ListEquality().equals(other.lookAheadBehind, lookAheadBehind) &&
|
||||
MapEquality().equals(other.patterns, patterns);
|
||||
}
|
||||
}
|
||||
|
||||
class Lemmatized {
|
||||
@@ -203,9 +248,10 @@ List<Lemmatized> _lemmatize(LemmatizationRule parentRule, String word) {
|
||||
|
||||
final filteredLemmatizationRules = parentRule.validChildClasses == null
|
||||
? lemmatizationRules
|
||||
: lemmatizationRules.where(
|
||||
(r) => parentRule.validChildClasses!.contains(r.wordClass),
|
||||
);
|
||||
: [
|
||||
for (final wordClass in parentRule.validChildClasses!)
|
||||
...lemmatizationRulesByWordClass[wordClass]!,
|
||||
];
|
||||
|
||||
for (final rule in filteredLemmatizationRules) {
|
||||
if (rule.matches(word)) {
|
||||
|
||||
@@ -1,10 +1,17 @@
|
||||
import 'package:jadb/util/lemmatizer/lemmatizer.dart';
|
||||
import 'package:jadb/util/lemmatizer/rules/godan-verbs.dart';
|
||||
import 'package:jadb/util/lemmatizer/rules/i-adjectives.dart';
|
||||
import 'package:jadb/util/lemmatizer/rules/ichidan-verbs.dart';
|
||||
import 'package:jadb/util/lemmatizer/rules/godan_verbs.dart';
|
||||
import 'package:jadb/util/lemmatizer/rules/i_adjectives.dart';
|
||||
import 'package:jadb/util/lemmatizer/rules/ichidan_verbs.dart';
|
||||
|
||||
List<LemmatizationRule> lemmatizationRules = [
|
||||
final List<LemmatizationRule> lemmatizationRules = List.unmodifiable([
|
||||
...ichidanVerbLemmatizationRules,
|
||||
...godanVerbLemmatizationRules,
|
||||
...iAdjectiveLemmatizationRules,
|
||||
];
|
||||
]);
|
||||
|
||||
final Map<WordClass, List<LemmatizationRule>> lemmatizationRulesByWordClass =
|
||||
Map.unmodifiable({
|
||||
WordClass.ichidanVerb: ichidanVerbLemmatizationRules,
|
||||
WordClass.iAdjective: iAdjectiveLemmatizationRules,
|
||||
WordClass.godanVerb: godanVerbLemmatizationRules,
|
||||
});
|
||||
|
||||
@@ -1,457 +0,0 @@
|
||||
import 'package:jadb/util/lemmatizer/lemmatizer.dart';
|
||||
|
||||
List<LemmatizationRule> godanVerbLemmatizationRules = [
|
||||
LemmatizationRule(
|
||||
name: 'Godan verb - base form',
|
||||
terminal: true,
|
||||
pattern: AllomorphPattern(
|
||||
patterns: {
|
||||
'う': ['う'],
|
||||
'く': ['く'],
|
||||
'ぐ': ['ぐ'],
|
||||
'す': ['す'],
|
||||
'つ': ['つ'],
|
||||
'ぬ': ['ぬ'],
|
||||
'ぶ': ['ぶ'],
|
||||
'む': ['む'],
|
||||
'る': ['る'],
|
||||
},
|
||||
type: LemmatizationRuleType.suffix,
|
||||
),
|
||||
validChildClasses: [WordClass.godanVerb],
|
||||
wordClass: WordClass.godanVerb,
|
||||
),
|
||||
LemmatizationRule(
|
||||
name: 'Godan verb - negative form',
|
||||
pattern: AllomorphPattern(
|
||||
patterns: {
|
||||
'わない': ['う'],
|
||||
'かない': ['く'],
|
||||
'がない': ['ぐ'],
|
||||
'さない': ['す'],
|
||||
'たない': ['つ'],
|
||||
'なない': ['ぬ'],
|
||||
'ばない': ['ぶ'],
|
||||
'まない': ['む'],
|
||||
'らない': ['る'],
|
||||
},
|
||||
type: LemmatizationRuleType.suffix,
|
||||
),
|
||||
validChildClasses: [WordClass.godanVerb],
|
||||
wordClass: WordClass.godanVerb,
|
||||
),
|
||||
LemmatizationRule(
|
||||
name: 'Godan verb - past form',
|
||||
pattern: AllomorphPattern(
|
||||
patterns: {
|
||||
'した': ['す'],
|
||||
'った': ['る', 'つ', 'う'],
|
||||
'んだ': ['む', 'ぬ', 'ぶ'],
|
||||
'いだ': ['ぐ'],
|
||||
'いた': ['く'],
|
||||
},
|
||||
type: LemmatizationRuleType.suffix,
|
||||
),
|
||||
validChildClasses: [WordClass.godanVerb],
|
||||
wordClass: WordClass.godanVerb,
|
||||
),
|
||||
LemmatizationRule(
|
||||
name: 'Godan verb - te-form',
|
||||
pattern: AllomorphPattern(
|
||||
patterns: {
|
||||
'いて': ['く', 'ぐ'],
|
||||
'して': ['す'],
|
||||
'って': ['る', 'つ', 'う'],
|
||||
'んで': ['む', 'ぬ', 'ぶ'],
|
||||
},
|
||||
type: LemmatizationRuleType.suffix,
|
||||
),
|
||||
validChildClasses: [WordClass.godanVerb],
|
||||
wordClass: WordClass.godanVerb,
|
||||
),
|
||||
LemmatizationRule(
|
||||
name: 'Godan verb - te-form with いる',
|
||||
pattern: AllomorphPattern(
|
||||
patterns: {
|
||||
'いている': ['く', 'ぐ'],
|
||||
'している': ['す'],
|
||||
'っている': ['る', 'つ', 'う'],
|
||||
'んでいる': ['む', 'ぬ', 'ぶ'],
|
||||
},
|
||||
type: LemmatizationRuleType.suffix,
|
||||
),
|
||||
validChildClasses: [WordClass.godanVerb],
|
||||
wordClass: WordClass.godanVerb,
|
||||
),
|
||||
LemmatizationRule(
|
||||
name: 'Godan verb - te-form with いた',
|
||||
pattern: AllomorphPattern(
|
||||
patterns: {
|
||||
'いていた': ['く', 'ぐ'],
|
||||
'していた': ['す'],
|
||||
'っていた': ['る', 'つ', 'う'],
|
||||
'んでいた': ['む', 'ぬ', 'ぶ'],
|
||||
},
|
||||
type: LemmatizationRuleType.suffix,
|
||||
),
|
||||
validChildClasses: [WordClass.godanVerb],
|
||||
wordClass: WordClass.godanVerb,
|
||||
),
|
||||
LemmatizationRule(
|
||||
name: 'Godan verb - conditional form',
|
||||
pattern: AllomorphPattern(
|
||||
patterns: {
|
||||
'けば': ['く'],
|
||||
'げば': ['ぐ'],
|
||||
'せば': ['す'],
|
||||
'てば': ['つ', 'る', 'う'],
|
||||
'ねば': ['ぬ'],
|
||||
'べば': ['ぶ'],
|
||||
'めば': ['む'],
|
||||
},
|
||||
type: LemmatizationRuleType.suffix,
|
||||
),
|
||||
validChildClasses: [WordClass.godanVerb],
|
||||
wordClass: WordClass.godanVerb,
|
||||
),
|
||||
LemmatizationRule(
|
||||
name: 'Godan verb - volitional form',
|
||||
pattern: AllomorphPattern(
|
||||
patterns: {
|
||||
'おう': ['う'],
|
||||
'こう': ['く'],
|
||||
'ごう': ['ぐ'],
|
||||
'そう': ['す'],
|
||||
'とう': ['つ', 'る', 'う'],
|
||||
'のう': ['ぬ'],
|
||||
'ぼう': ['ぶ'],
|
||||
'もう': ['む'],
|
||||
},
|
||||
type: LemmatizationRuleType.suffix,
|
||||
),
|
||||
validChildClasses: [WordClass.godanVerb],
|
||||
wordClass: WordClass.godanVerb,
|
||||
),
|
||||
LemmatizationRule(
|
||||
name: 'Godan verb - potential form',
|
||||
pattern: AllomorphPattern(
|
||||
patterns: {
|
||||
'ける': ['く'],
|
||||
'げる': ['ぐ'],
|
||||
'せる': ['す'],
|
||||
'てる': ['つ', 'る', 'う'],
|
||||
'ねる': ['ぬ'],
|
||||
'べる': ['ぶ'],
|
||||
'める': ['む'],
|
||||
},
|
||||
type: LemmatizationRuleType.suffix,
|
||||
),
|
||||
validChildClasses: [WordClass.godanVerb],
|
||||
wordClass: WordClass.godanVerb,
|
||||
),
|
||||
LemmatizationRule(
|
||||
name: 'Godan verb - passive form',
|
||||
pattern: AllomorphPattern(
|
||||
patterns: {
|
||||
'かれる': ['く'],
|
||||
'がれる': ['ぐ'],
|
||||
'される': ['す'],
|
||||
'たれる': ['つ', 'る', 'う'],
|
||||
'なれる': ['ぬ'],
|
||||
'ばれる': ['ぶ'],
|
||||
'まれる': ['む'],
|
||||
},
|
||||
type: LemmatizationRuleType.suffix,
|
||||
),
|
||||
validChildClasses: [WordClass.godanVerb],
|
||||
wordClass: WordClass.godanVerb,
|
||||
),
|
||||
LemmatizationRule(
|
||||
name: 'Godan verb - causative form',
|
||||
pattern: AllomorphPattern(
|
||||
patterns: {
|
||||
'かせる': ['く'],
|
||||
'がせる': ['ぐ'],
|
||||
'させる': ['す'],
|
||||
'たせる': ['つ', 'る', 'う'],
|
||||
'なせる': ['ぬ'],
|
||||
'ばせる': ['ぶ'],
|
||||
'ませる': ['む'],
|
||||
},
|
||||
type: LemmatizationRuleType.suffix,
|
||||
),
|
||||
validChildClasses: [WordClass.godanVerb],
|
||||
wordClass: WordClass.godanVerb,
|
||||
),
|
||||
LemmatizationRule(
|
||||
name: 'Godan verb - causative-passive form',
|
||||
pattern: AllomorphPattern(
|
||||
patterns: {
|
||||
'かされる': ['く'],
|
||||
'がされる': ['ぐ'],
|
||||
'される': ['す'],
|
||||
'たされる': ['つ', 'る', 'う'],
|
||||
'なされる': ['ぬ'],
|
||||
'ばされる': ['ぶ'],
|
||||
'まされる': ['む'],
|
||||
},
|
||||
type: LemmatizationRuleType.suffix,
|
||||
),
|
||||
validChildClasses: [WordClass.godanVerb],
|
||||
wordClass: WordClass.godanVerb,
|
||||
),
|
||||
LemmatizationRule(
|
||||
name: 'Godan verb - imperative form',
|
||||
pattern: AllomorphPattern(
|
||||
patterns: {
|
||||
'え': ['う'],
|
||||
'け': ['く'],
|
||||
'げ': ['ぐ'],
|
||||
'せ': ['す'],
|
||||
'て': ['つ', 'る', 'う'],
|
||||
'ね': ['ぬ'],
|
||||
'べ': ['ぶ'],
|
||||
'め': ['む'],
|
||||
},
|
||||
type: LemmatizationRuleType.suffix,
|
||||
),
|
||||
validChildClasses: [WordClass.godanVerb],
|
||||
wordClass: WordClass.godanVerb,
|
||||
),
|
||||
LemmatizationRule(
|
||||
name: 'Godan verb - negative past form',
|
||||
pattern: AllomorphPattern(
|
||||
patterns: {
|
||||
'わなかった': ['う'],
|
||||
'かなかった': ['く'],
|
||||
'がなかった': ['ぐ'],
|
||||
'さなかった': ['す'],
|
||||
'たなかった': ['つ'],
|
||||
'ななかった': ['ぬ'],
|
||||
'ばなかった': ['ぶ'],
|
||||
'まなかった': ['む'],
|
||||
'らなかった': ['る'],
|
||||
},
|
||||
type: LemmatizationRuleType.suffix,
|
||||
),
|
||||
validChildClasses: [WordClass.godanVerb],
|
||||
wordClass: WordClass.godanVerb,
|
||||
),
|
||||
LemmatizationRule(
|
||||
name: 'Godan verb - negative te-form',
|
||||
pattern: AllomorphPattern(
|
||||
patterns: {
|
||||
'わなくて': ['う'],
|
||||
'かなくて': ['く'],
|
||||
'がなくて': ['ぐ'],
|
||||
'さなくて': ['す'],
|
||||
'たなくて': ['つ'],
|
||||
'ななくて': ['ぬ'],
|
||||
'ばなくて': ['ぶ'],
|
||||
'まなくて': ['む'],
|
||||
'らなくて': ['る'],
|
||||
},
|
||||
type: LemmatizationRuleType.suffix,
|
||||
),
|
||||
validChildClasses: [WordClass.godanVerb],
|
||||
wordClass: WordClass.godanVerb,
|
||||
),
|
||||
LemmatizationRule(
|
||||
name: 'Godan verb - negative conditional form',
|
||||
pattern: AllomorphPattern(
|
||||
patterns: {
|
||||
'わなければ': ['う'],
|
||||
'かなければ': ['く'],
|
||||
'がなければ': ['ぐ'],
|
||||
'さなければ': ['す'],
|
||||
'たなければ': ['つ'],
|
||||
'ななければ': ['ぬ'],
|
||||
'ばなければ': ['ぶ'],
|
||||
'まなければ': ['む'],
|
||||
'らなければ': ['る'],
|
||||
},
|
||||
type: LemmatizationRuleType.suffix,
|
||||
),
|
||||
validChildClasses: [WordClass.godanVerb],
|
||||
wordClass: WordClass.godanVerb,
|
||||
),
|
||||
LemmatizationRule(
|
||||
name: 'Godan verb - negative volitional form',
|
||||
pattern: AllomorphPattern(
|
||||
patterns: {
|
||||
'うまい': ['う'],
|
||||
'くまい': ['く'],
|
||||
'ぐまい': ['ぐ'],
|
||||
'すまい': ['す'],
|
||||
'つまい': ['つ', 'る', 'う'],
|
||||
'ぬまい': ['ぬ'],
|
||||
'ぶまい': ['ぶ'],
|
||||
'むまい': ['む'],
|
||||
},
|
||||
type: LemmatizationRuleType.suffix,
|
||||
),
|
||||
validChildClasses: [WordClass.godanVerb],
|
||||
wordClass: WordClass.godanVerb,
|
||||
),
|
||||
LemmatizationRule(
|
||||
name: 'Godan verb - negative potential form',
|
||||
pattern: AllomorphPattern(
|
||||
patterns: {
|
||||
'けない': ['く'],
|
||||
'げない': ['ぐ'],
|
||||
'せない': ['す'],
|
||||
'てない': ['つ', 'る', 'う'],
|
||||
'ねない': ['ぬ'],
|
||||
'べない': ['ぶ'],
|
||||
'めない': ['む'],
|
||||
},
|
||||
type: LemmatizationRuleType.suffix,
|
||||
),
|
||||
validChildClasses: [WordClass.godanVerb],
|
||||
wordClass: WordClass.godanVerb,
|
||||
),
|
||||
LemmatizationRule(
|
||||
name: 'Godan verb - negative passive form',
|
||||
pattern: AllomorphPattern(
|
||||
patterns: {
|
||||
'かれない': ['く'],
|
||||
'がれない': ['ぐ'],
|
||||
'されない': ['す'],
|
||||
'たれない': ['つ', 'る', 'う'],
|
||||
'なれない': ['ぬ'],
|
||||
'ばれない': ['ぶ'],
|
||||
'まれない': ['む'],
|
||||
},
|
||||
type: LemmatizationRuleType.suffix,
|
||||
),
|
||||
validChildClasses: [WordClass.godanVerb],
|
||||
wordClass: WordClass.godanVerb,
|
||||
),
|
||||
LemmatizationRule(
|
||||
name: 'Godan verb - negative causative form',
|
||||
pattern: AllomorphPattern(
|
||||
patterns: {
|
||||
'かせない': ['く'],
|
||||
'がせない': ['ぐ'],
|
||||
'させない': ['す'],
|
||||
'たせない': ['つ', 'る', 'う'],
|
||||
'なせない': ['ぬ'],
|
||||
'ばせない': ['ぶ'],
|
||||
'ませない': ['む'],
|
||||
},
|
||||
type: LemmatizationRuleType.suffix,
|
||||
),
|
||||
validChildClasses: [WordClass.godanVerb],
|
||||
wordClass: WordClass.godanVerb,
|
||||
),
|
||||
LemmatizationRule(
|
||||
name: 'Godan verb - negative causative-passive form',
|
||||
pattern: AllomorphPattern(
|
||||
patterns: {
|
||||
'かされない': ['く'],
|
||||
'がされない': ['ぐ'],
|
||||
'されない': ['す'],
|
||||
'たされない': ['つ', 'る', 'う'],
|
||||
'なされない': ['ぬ'],
|
||||
'ばされない': ['ぶ'],
|
||||
'まされない': ['む'],
|
||||
},
|
||||
type: LemmatizationRuleType.suffix,
|
||||
),
|
||||
validChildClasses: [WordClass.godanVerb],
|
||||
wordClass: WordClass.godanVerb,
|
||||
),
|
||||
LemmatizationRule(
|
||||
name: 'Godan verb - negative imperative form',
|
||||
pattern: AllomorphPattern(
|
||||
patterns: {
|
||||
'うな': ['う'],
|
||||
'くな': ['く'],
|
||||
'ぐな': ['ぐ'],
|
||||
'すな': ['す'],
|
||||
'つな': ['つ'],
|
||||
'ぬな': ['ぬ'],
|
||||
'ぶな': ['ぶ'],
|
||||
'むな': ['む'],
|
||||
'るな': ['る'],
|
||||
},
|
||||
type: LemmatizationRuleType.suffix,
|
||||
),
|
||||
validChildClasses: [WordClass.godanVerb],
|
||||
wordClass: WordClass.godanVerb,
|
||||
),
|
||||
LemmatizationRule(
|
||||
name: 'Godan verb - desire form',
|
||||
pattern: AllomorphPattern(
|
||||
patterns: {
|
||||
'きたい': ['く'],
|
||||
'ぎたい': ['ぐ'],
|
||||
'したい': ['す'],
|
||||
'ちたい': ['つ'],
|
||||
'にたい': ['ぬ'],
|
||||
'びたい': ['ぶ'],
|
||||
'みたい': ['む'],
|
||||
'りたい': ['る'],
|
||||
},
|
||||
type: LemmatizationRuleType.suffix,
|
||||
),
|
||||
validChildClasses: [WordClass.godanVerb],
|
||||
wordClass: WordClass.godanVerb,
|
||||
),
|
||||
LemmatizationRule(
|
||||
name: 'Godan verb - negative desire form',
|
||||
pattern: AllomorphPattern(
|
||||
patterns: {
|
||||
'いたくない': ['う'],
|
||||
'きたくない': ['く'],
|
||||
'ぎたくない': ['ぐ'],
|
||||
'したくない': ['す'],
|
||||
'ちたくない': ['つ'],
|
||||
'にたくない': ['ぬ'],
|
||||
'びたくない': ['ぶ'],
|
||||
'みたくない': ['む'],
|
||||
'りたくない': ['る'],
|
||||
},
|
||||
type: LemmatizationRuleType.suffix,
|
||||
),
|
||||
validChildClasses: [WordClass.godanVerb],
|
||||
wordClass: WordClass.godanVerb,
|
||||
),
|
||||
LemmatizationRule(
|
||||
name: 'Godan verb - past desire form',
|
||||
pattern: AllomorphPattern(
|
||||
patterns: {
|
||||
'きたかった': ['く'],
|
||||
'ぎたかった': ['ぐ'],
|
||||
'したかった': ['す'],
|
||||
'ちたかった': ['つ'],
|
||||
'にたかった': ['ぬ'],
|
||||
'びたかった': ['ぶ'],
|
||||
'みたかった': ['む'],
|
||||
'りたかった': ['る'],
|
||||
},
|
||||
type: LemmatizationRuleType.suffix,
|
||||
),
|
||||
validChildClasses: [WordClass.godanVerb],
|
||||
wordClass: WordClass.godanVerb,
|
||||
),
|
||||
LemmatizationRule(
|
||||
name: 'Godan verb - negative past desire form',
|
||||
pattern: AllomorphPattern(
|
||||
patterns: {
|
||||
'いたくなかった': ['う'],
|
||||
'きたくなかった': ['く'],
|
||||
'ぎたくなかった': ['ぐ'],
|
||||
'したくなかった': ['す'],
|
||||
'ちたくなかった': ['つ'],
|
||||
'にたくなかった': ['ぬ'],
|
||||
'びたくなかった': ['ぶ'],
|
||||
'みたくなかった': ['む'],
|
||||
'りたくなかった': ['る'],
|
||||
},
|
||||
type: LemmatizationRuleType.suffix,
|
||||
),
|
||||
validChildClasses: [WordClass.godanVerb],
|
||||
wordClass: WordClass.godanVerb,
|
||||
),
|
||||
];
|
||||
509
lib/util/lemmatizer/rules/godan_verbs.dart
Normal file
509
lib/util/lemmatizer/rules/godan_verbs.dart
Normal file
@@ -0,0 +1,509 @@
|
||||
import 'package:jadb/util/lemmatizer/lemmatizer.dart';
|
||||
|
||||
final LemmatizationRule godanVerbBase = LemmatizationRule(
|
||||
name: 'Godan verb - base form',
|
||||
terminal: true,
|
||||
pattern: AllomorphPattern(
|
||||
patterns: {
|
||||
'う': ['う'],
|
||||
'く': ['く'],
|
||||
'ぐ': ['ぐ'],
|
||||
'す': ['す'],
|
||||
'つ': ['つ'],
|
||||
'ぬ': ['ぬ'],
|
||||
'ぶ': ['ぶ'],
|
||||
'む': ['む'],
|
||||
'る': ['る'],
|
||||
},
|
||||
type: LemmatizationRuleType.suffix,
|
||||
),
|
||||
validChildClasses: {WordClass.godanVerb},
|
||||
wordClass: WordClass.godanVerb,
|
||||
);
|
||||
|
||||
final LemmatizationRule godanVerbNegative = LemmatizationRule(
|
||||
name: 'Godan verb - negative form',
|
||||
pattern: AllomorphPattern(
|
||||
patterns: {
|
||||
'わない': ['う'],
|
||||
'かない': ['く'],
|
||||
'がない': ['ぐ'],
|
||||
'さない': ['す'],
|
||||
'たない': ['つ'],
|
||||
'なない': ['ぬ'],
|
||||
'ばない': ['ぶ'],
|
||||
'まない': ['む'],
|
||||
'らない': ['る'],
|
||||
},
|
||||
type: LemmatizationRuleType.suffix,
|
||||
),
|
||||
validChildClasses: {WordClass.godanVerb},
|
||||
wordClass: WordClass.godanVerb,
|
||||
);
|
||||
|
||||
final LemmatizationRule godanVerbPast = LemmatizationRule(
|
||||
name: 'Godan verb - past form',
|
||||
pattern: AllomorphPattern(
|
||||
patterns: {
|
||||
'した': ['す'],
|
||||
'った': ['る', 'つ', 'う'],
|
||||
'んだ': ['む', 'ぬ', 'ぶ'],
|
||||
'いだ': ['ぐ'],
|
||||
'いた': ['く'],
|
||||
},
|
||||
type: LemmatizationRuleType.suffix,
|
||||
),
|
||||
validChildClasses: {WordClass.godanVerb},
|
||||
wordClass: WordClass.godanVerb,
|
||||
);
|
||||
|
||||
final LemmatizationRule godanVerbTe = LemmatizationRule(
|
||||
name: 'Godan verb - te-form',
|
||||
pattern: AllomorphPattern(
|
||||
patterns: {
|
||||
'いて': ['く', 'ぐ'],
|
||||
'して': ['す'],
|
||||
'って': ['る', 'つ', 'う'],
|
||||
'んで': ['む', 'ぬ', 'ぶ'],
|
||||
},
|
||||
type: LemmatizationRuleType.suffix,
|
||||
),
|
||||
validChildClasses: {WordClass.godanVerb},
|
||||
wordClass: WordClass.godanVerb,
|
||||
);
|
||||
|
||||
final LemmatizationRule godanVerbTeiru = LemmatizationRule(
|
||||
name: 'Godan verb - te-form with いる',
|
||||
pattern: AllomorphPattern(
|
||||
patterns: {
|
||||
'いている': ['く', 'ぐ'],
|
||||
'している': ['す'],
|
||||
'っている': ['る', 'つ', 'う'],
|
||||
'んでいる': ['む', 'ぬ', 'ぶ'],
|
||||
},
|
||||
type: LemmatizationRuleType.suffix,
|
||||
),
|
||||
validChildClasses: {WordClass.godanVerb},
|
||||
wordClass: WordClass.godanVerb,
|
||||
);
|
||||
|
||||
final LemmatizationRule godanVerbTeita = LemmatizationRule(
|
||||
name: 'Godan verb - te-form with いた',
|
||||
pattern: AllomorphPattern(
|
||||
patterns: {
|
||||
'いていた': ['く', 'ぐ'],
|
||||
'していた': ['す'],
|
||||
'っていた': ['る', 'つ', 'う'],
|
||||
'んでいた': ['む', 'ぬ', 'ぶ'],
|
||||
},
|
||||
type: LemmatizationRuleType.suffix,
|
||||
),
|
||||
validChildClasses: {WordClass.godanVerb},
|
||||
wordClass: WordClass.godanVerb,
|
||||
);
|
||||
|
||||
final LemmatizationRule godanVerbConditional = LemmatizationRule(
|
||||
name: 'Godan verb - conditional form',
|
||||
pattern: AllomorphPattern(
|
||||
patterns: {
|
||||
'けば': ['く'],
|
||||
'げば': ['ぐ'],
|
||||
'せば': ['す'],
|
||||
'てば': ['つ', 'る', 'う'],
|
||||
'ねば': ['ぬ'],
|
||||
'べば': ['ぶ'],
|
||||
'めば': ['む'],
|
||||
},
|
||||
type: LemmatizationRuleType.suffix,
|
||||
),
|
||||
validChildClasses: {WordClass.godanVerb},
|
||||
wordClass: WordClass.godanVerb,
|
||||
);
|
||||
|
||||
final LemmatizationRule godanVerbVolitional = LemmatizationRule(
|
||||
name: 'Godan verb - volitional form',
|
||||
pattern: AllomorphPattern(
|
||||
patterns: {
|
||||
'おう': ['う'],
|
||||
'こう': ['く'],
|
||||
'ごう': ['ぐ'],
|
||||
'そう': ['す'],
|
||||
'とう': ['つ', 'る', 'う'],
|
||||
'のう': ['ぬ'],
|
||||
'ぼう': ['ぶ'],
|
||||
'もう': ['む'],
|
||||
},
|
||||
type: LemmatizationRuleType.suffix,
|
||||
),
|
||||
validChildClasses: {WordClass.godanVerb},
|
||||
wordClass: WordClass.godanVerb,
|
||||
);
|
||||
|
||||
final LemmatizationRule godanVerbPotential = LemmatizationRule(
|
||||
name: 'Godan verb - potential form',
|
||||
pattern: AllomorphPattern(
|
||||
patterns: {
|
||||
'ける': ['く'],
|
||||
'げる': ['ぐ'],
|
||||
'せる': ['す'],
|
||||
'てる': ['つ', 'る', 'う'],
|
||||
'ねる': ['ぬ'],
|
||||
'べる': ['ぶ'],
|
||||
'める': ['む'],
|
||||
},
|
||||
type: LemmatizationRuleType.suffix,
|
||||
),
|
||||
validChildClasses: {WordClass.godanVerb},
|
||||
wordClass: WordClass.godanVerb,
|
||||
);
|
||||
|
||||
final LemmatizationRule godanVerbPassive = LemmatizationRule(
|
||||
name: 'Godan verb - passive form',
|
||||
pattern: AllomorphPattern(
|
||||
patterns: {
|
||||
'かれる': ['く'],
|
||||
'がれる': ['ぐ'],
|
||||
'される': ['す'],
|
||||
'たれる': ['つ', 'る', 'う'],
|
||||
'なれる': ['ぬ'],
|
||||
'ばれる': ['ぶ'],
|
||||
'まれる': ['む'],
|
||||
},
|
||||
type: LemmatizationRuleType.suffix,
|
||||
),
|
||||
validChildClasses: {WordClass.godanVerb},
|
||||
wordClass: WordClass.godanVerb,
|
||||
);
|
||||
|
||||
final LemmatizationRule godanVerbCausative = LemmatizationRule(
|
||||
name: 'Godan verb - causative form',
|
||||
pattern: AllomorphPattern(
|
||||
patterns: {
|
||||
'かせる': ['く'],
|
||||
'がせる': ['ぐ'],
|
||||
'させる': ['す'],
|
||||
'たせる': ['つ', 'る', 'う'],
|
||||
'なせる': ['ぬ'],
|
||||
'ばせる': ['ぶ'],
|
||||
'ませる': ['む'],
|
||||
},
|
||||
type: LemmatizationRuleType.suffix,
|
||||
),
|
||||
validChildClasses: {WordClass.godanVerb},
|
||||
wordClass: WordClass.godanVerb,
|
||||
);
|
||||
|
||||
final LemmatizationRule godanVerbCausativePassive = LemmatizationRule(
|
||||
name: 'Godan verb - causative-passive form',
|
||||
pattern: AllomorphPattern(
|
||||
patterns: {
|
||||
'かされる': ['く'],
|
||||
'がされる': ['ぐ'],
|
||||
'される': ['す'],
|
||||
'たされる': ['つ', 'る', 'う'],
|
||||
'なされる': ['ぬ'],
|
||||
'ばされる': ['ぶ'],
|
||||
'まされる': ['む'],
|
||||
},
|
||||
type: LemmatizationRuleType.suffix,
|
||||
),
|
||||
validChildClasses: {WordClass.godanVerb},
|
||||
wordClass: WordClass.godanVerb,
|
||||
);
|
||||
|
||||
final LemmatizationRule godanVerbImperative = LemmatizationRule(
|
||||
name: 'Godan verb - imperative form',
|
||||
pattern: AllomorphPattern(
|
||||
patterns: {
|
||||
'え': ['う'],
|
||||
'け': ['く'],
|
||||
'げ': ['ぐ'],
|
||||
'せ': ['す'],
|
||||
'て': ['つ', 'る', 'う'],
|
||||
'ね': ['ぬ'],
|
||||
'べ': ['ぶ'],
|
||||
'め': ['む'],
|
||||
},
|
||||
type: LemmatizationRuleType.suffix,
|
||||
),
|
||||
validChildClasses: {WordClass.godanVerb},
|
||||
wordClass: WordClass.godanVerb,
|
||||
);
|
||||
|
||||
final LemmatizationRule godanVerbNegativePast = LemmatizationRule(
|
||||
name: 'Godan verb - negative past form',
|
||||
pattern: AllomorphPattern(
|
||||
patterns: {
|
||||
'わなかった': ['う'],
|
||||
'かなかった': ['く'],
|
||||
'がなかった': ['ぐ'],
|
||||
'さなかった': ['す'],
|
||||
'たなかった': ['つ'],
|
||||
'ななかった': ['ぬ'],
|
||||
'ばなかった': ['ぶ'],
|
||||
'まなかった': ['む'],
|
||||
'らなかった': ['る'],
|
||||
},
|
||||
type: LemmatizationRuleType.suffix,
|
||||
),
|
||||
validChildClasses: {WordClass.godanVerb},
|
||||
wordClass: WordClass.godanVerb,
|
||||
);
|
||||
|
||||
final LemmatizationRule godanVerbNegativeTe = LemmatizationRule(
|
||||
name: 'Godan verb - negative te-form',
|
||||
pattern: AllomorphPattern(
|
||||
patterns: {
|
||||
'わなくて': ['う'],
|
||||
'かなくて': ['く'],
|
||||
'がなくて': ['ぐ'],
|
||||
'さなくて': ['す'],
|
||||
'たなくて': ['つ'],
|
||||
'ななくて': ['ぬ'],
|
||||
'ばなくて': ['ぶ'],
|
||||
'まなくて': ['む'],
|
||||
'らなくて': ['る'],
|
||||
},
|
||||
type: LemmatizationRuleType.suffix,
|
||||
),
|
||||
validChildClasses: {WordClass.godanVerb},
|
||||
wordClass: WordClass.godanVerb,
|
||||
);
|
||||
|
||||
final LemmatizationRule godanVerbNegativeConditional = LemmatizationRule(
|
||||
name: 'Godan verb - negative conditional form',
|
||||
pattern: AllomorphPattern(
|
||||
patterns: {
|
||||
'わなければ': ['う'],
|
||||
'かなければ': ['く'],
|
||||
'がなければ': ['ぐ'],
|
||||
'さなければ': ['す'],
|
||||
'たなければ': ['つ'],
|
||||
'ななければ': ['ぬ'],
|
||||
'ばなければ': ['ぶ'],
|
||||
'まなければ': ['む'],
|
||||
'らなければ': ['る'],
|
||||
},
|
||||
type: LemmatizationRuleType.suffix,
|
||||
),
|
||||
validChildClasses: {WordClass.godanVerb},
|
||||
wordClass: WordClass.godanVerb,
|
||||
);
|
||||
|
||||
final LemmatizationRule godanVerbNegativeVolitional = LemmatizationRule(
|
||||
name: 'Godan verb - negative volitional form',
|
||||
pattern: AllomorphPattern(
|
||||
patterns: {
|
||||
'うまい': ['う'],
|
||||
'くまい': ['く'],
|
||||
'ぐまい': ['ぐ'],
|
||||
'すまい': ['す'],
|
||||
'つまい': ['つ', 'る', 'う'],
|
||||
'ぬまい': ['ぬ'],
|
||||
'ぶまい': ['ぶ'],
|
||||
'むまい': ['む'],
|
||||
},
|
||||
type: LemmatizationRuleType.suffix,
|
||||
),
|
||||
validChildClasses: {WordClass.godanVerb},
|
||||
wordClass: WordClass.godanVerb,
|
||||
);
|
||||
|
||||
final LemmatizationRule godanVerbNegativePotential = LemmatizationRule(
|
||||
name: 'Godan verb - negative potential form',
|
||||
pattern: AllomorphPattern(
|
||||
patterns: {
|
||||
'けない': ['く'],
|
||||
'げない': ['ぐ'],
|
||||
'せない': ['す'],
|
||||
'てない': ['つ', 'る', 'う'],
|
||||
'ねない': ['ぬ'],
|
||||
'べない': ['ぶ'],
|
||||
'めない': ['む'],
|
||||
},
|
||||
type: LemmatizationRuleType.suffix,
|
||||
),
|
||||
validChildClasses: {WordClass.godanVerb},
|
||||
wordClass: WordClass.godanVerb,
|
||||
);
|
||||
|
||||
final LemmatizationRule godanVerbNegativePassive = LemmatizationRule(
|
||||
name: 'Godan verb - negative passive form',
|
||||
pattern: AllomorphPattern(
|
||||
patterns: {
|
||||
'かれない': ['く'],
|
||||
'がれない': ['ぐ'],
|
||||
'されない': ['す'],
|
||||
'たれない': ['つ', 'る', 'う'],
|
||||
'なれない': ['ぬ'],
|
||||
'ばれない': ['ぶ'],
|
||||
'まれない': ['む'],
|
||||
},
|
||||
type: LemmatizationRuleType.suffix,
|
||||
),
|
||||
validChildClasses: {WordClass.godanVerb},
|
||||
wordClass: WordClass.godanVerb,
|
||||
);
|
||||
|
||||
final LemmatizationRule godanVerbNegativeCausative = LemmatizationRule(
|
||||
name: 'Godan verb - negative causative form',
|
||||
pattern: AllomorphPattern(
|
||||
patterns: {
|
||||
'かせない': ['く'],
|
||||
'がせない': ['ぐ'],
|
||||
'させない': ['す'],
|
||||
'たせない': ['つ', 'る', 'う'],
|
||||
'なせない': ['ぬ'],
|
||||
'ばせない': ['ぶ'],
|
||||
'ませない': ['む'],
|
||||
},
|
||||
type: LemmatizationRuleType.suffix,
|
||||
),
|
||||
validChildClasses: {WordClass.godanVerb},
|
||||
wordClass: WordClass.godanVerb,
|
||||
);
|
||||
|
||||
final LemmatizationRule godanVerbNegativeCausativePassive = LemmatizationRule(
|
||||
name: 'Godan verb - negative causative-passive form',
|
||||
pattern: AllomorphPattern(
|
||||
patterns: {
|
||||
'かされない': ['く'],
|
||||
'がされない': ['ぐ'],
|
||||
'されない': ['す'],
|
||||
'たされない': ['つ', 'る', 'う'],
|
||||
'なされない': ['ぬ'],
|
||||
'ばされない': ['ぶ'],
|
||||
'まされない': ['む'],
|
||||
},
|
||||
type: LemmatizationRuleType.suffix,
|
||||
),
|
||||
validChildClasses: {WordClass.godanVerb},
|
||||
wordClass: WordClass.godanVerb,
|
||||
);
|
||||
|
||||
final LemmatizationRule godanVerbNegativeImperative = LemmatizationRule(
|
||||
name: 'Godan verb - negative imperative form',
|
||||
pattern: AllomorphPattern(
|
||||
patterns: {
|
||||
'うな': ['う'],
|
||||
'くな': ['く'],
|
||||
'ぐな': ['ぐ'],
|
||||
'すな': ['す'],
|
||||
'つな': ['つ'],
|
||||
'ぬな': ['ぬ'],
|
||||
'ぶな': ['ぶ'],
|
||||
'むな': ['む'],
|
||||
'るな': ['る'],
|
||||
},
|
||||
type: LemmatizationRuleType.suffix,
|
||||
),
|
||||
validChildClasses: {WordClass.godanVerb},
|
||||
wordClass: WordClass.godanVerb,
|
||||
);
|
||||
|
||||
final LemmatizationRule godanVerbDesire = LemmatizationRule(
|
||||
name: 'Godan verb - desire form',
|
||||
pattern: AllomorphPattern(
|
||||
patterns: {
|
||||
'きたい': ['く'],
|
||||
'ぎたい': ['ぐ'],
|
||||
'したい': ['す'],
|
||||
'ちたい': ['つ'],
|
||||
'にたい': ['ぬ'],
|
||||
'びたい': ['ぶ'],
|
||||
'みたい': ['む'],
|
||||
'りたい': ['る'],
|
||||
},
|
||||
type: LemmatizationRuleType.suffix,
|
||||
),
|
||||
validChildClasses: {WordClass.godanVerb},
|
||||
wordClass: WordClass.godanVerb,
|
||||
);
|
||||
|
||||
final LemmatizationRule godanVerbNegativeDesire = LemmatizationRule(
|
||||
name: 'Godan verb - negative desire form',
|
||||
pattern: AllomorphPattern(
|
||||
patterns: {
|
||||
'いたくない': ['う'],
|
||||
'きたくない': ['く'],
|
||||
'ぎたくない': ['ぐ'],
|
||||
'したくない': ['す'],
|
||||
'ちたくない': ['つ'],
|
||||
'にたくない': ['ぬ'],
|
||||
'びたくない': ['ぶ'],
|
||||
'みたくない': ['む'],
|
||||
'りたくない': ['る'],
|
||||
},
|
||||
type: LemmatizationRuleType.suffix,
|
||||
),
|
||||
validChildClasses: {WordClass.godanVerb},
|
||||
wordClass: WordClass.godanVerb,
|
||||
);
|
||||
|
||||
final LemmatizationRule godanVerbPastDesire = LemmatizationRule(
|
||||
name: 'Godan verb - past desire form',
|
||||
pattern: AllomorphPattern(
|
||||
patterns: {
|
||||
'きたかった': ['く'],
|
||||
'ぎたかった': ['ぐ'],
|
||||
'したかった': ['す'],
|
||||
'ちたかった': ['つ'],
|
||||
'にたかった': ['ぬ'],
|
||||
'びたかった': ['ぶ'],
|
||||
'みたかった': ['む'],
|
||||
'りたかった': ['る'],
|
||||
},
|
||||
type: LemmatizationRuleType.suffix,
|
||||
),
|
||||
validChildClasses: {WordClass.godanVerb},
|
||||
wordClass: WordClass.godanVerb,
|
||||
);
|
||||
|
||||
final LemmatizationRule godanVerbNegativePastDesire = LemmatizationRule(
|
||||
name: 'Godan verb - negative past desire form',
|
||||
pattern: AllomorphPattern(
|
||||
patterns: {
|
||||
'いたくなかった': ['う'],
|
||||
'きたくなかった': ['く'],
|
||||
'ぎたくなかった': ['ぐ'],
|
||||
'したくなかった': ['す'],
|
||||
'ちたくなかった': ['つ'],
|
||||
'にたくなかった': ['ぬ'],
|
||||
'びたくなかった': ['ぶ'],
|
||||
'みたくなかった': ['む'],
|
||||
'りたくなかった': ['る'],
|
||||
},
|
||||
type: LemmatizationRuleType.suffix,
|
||||
),
|
||||
validChildClasses: {WordClass.godanVerb},
|
||||
wordClass: WordClass.godanVerb,
|
||||
);
|
||||
|
||||
final List<LemmatizationRule> godanVerbLemmatizationRules = List.unmodifiable([
|
||||
godanVerbBase,
|
||||
godanVerbNegative,
|
||||
godanVerbPast,
|
||||
godanVerbTe,
|
||||
godanVerbTeiru,
|
||||
godanVerbTeita,
|
||||
godanVerbConditional,
|
||||
godanVerbVolitional,
|
||||
godanVerbPotential,
|
||||
godanVerbPassive,
|
||||
godanVerbCausative,
|
||||
godanVerbCausativePassive,
|
||||
godanVerbImperative,
|
||||
godanVerbNegativePast,
|
||||
godanVerbNegativeTe,
|
||||
godanVerbNegativeConditional,
|
||||
godanVerbNegativeVolitional,
|
||||
godanVerbNegativePotential,
|
||||
godanVerbNegativePassive,
|
||||
godanVerbNegativeCausative,
|
||||
godanVerbNegativeCausativePassive,
|
||||
godanVerbNegativeImperative,
|
||||
godanVerbDesire,
|
||||
godanVerbNegativeDesire,
|
||||
godanVerbPastDesire,
|
||||
godanVerbNegativePastDesire,
|
||||
]);
|
||||
@@ -1,61 +0,0 @@
|
||||
import 'package:jadb/util/lemmatizer/lemmatizer.dart';
|
||||
|
||||
List<LemmatizationRule> iAdjectiveLemmatizationRules = [
|
||||
LemmatizationRule.simple(
|
||||
name: 'I adjective - base form',
|
||||
terminal: true,
|
||||
pattern: 'い',
|
||||
replacement: 'い',
|
||||
validChildClasses: [WordClass.iAdjective],
|
||||
wordClass: WordClass.iAdjective,
|
||||
),
|
||||
LemmatizationRule.simple(
|
||||
name: 'I adjective - negative form',
|
||||
pattern: 'くない',
|
||||
replacement: 'い',
|
||||
validChildClasses: [WordClass.iAdjective],
|
||||
wordClass: WordClass.iAdjective,
|
||||
),
|
||||
LemmatizationRule.simple(
|
||||
name: 'I adjective - past form',
|
||||
pattern: 'かった',
|
||||
replacement: 'い',
|
||||
validChildClasses: [WordClass.iAdjective],
|
||||
wordClass: WordClass.iAdjective,
|
||||
),
|
||||
LemmatizationRule.simple(
|
||||
name: 'I adjective - negative past form',
|
||||
pattern: 'くなかった',
|
||||
replacement: 'い',
|
||||
validChildClasses: [WordClass.iAdjective],
|
||||
wordClass: WordClass.iAdjective,
|
||||
),
|
||||
LemmatizationRule.simple(
|
||||
name: 'I adjective - te-form',
|
||||
pattern: 'くて',
|
||||
replacement: 'い',
|
||||
validChildClasses: [WordClass.iAdjective],
|
||||
wordClass: WordClass.iAdjective,
|
||||
),
|
||||
LemmatizationRule.simple(
|
||||
name: 'I adjective - conditional form',
|
||||
pattern: 'ければ',
|
||||
replacement: 'い',
|
||||
validChildClasses: [WordClass.iAdjective],
|
||||
wordClass: WordClass.iAdjective,
|
||||
),
|
||||
LemmatizationRule.simple(
|
||||
name: 'I adjective - volitional form',
|
||||
pattern: 'かろう',
|
||||
replacement: 'い',
|
||||
validChildClasses: [WordClass.iAdjective],
|
||||
wordClass: WordClass.iAdjective,
|
||||
),
|
||||
LemmatizationRule.simple(
|
||||
name: 'I adjective - continuative form',
|
||||
pattern: 'く',
|
||||
replacement: 'い',
|
||||
validChildClasses: [WordClass.iAdjective],
|
||||
wordClass: WordClass.iAdjective,
|
||||
),
|
||||
];
|
||||
77
lib/util/lemmatizer/rules/i_adjectives.dart
Normal file
77
lib/util/lemmatizer/rules/i_adjectives.dart
Normal file
@@ -0,0 +1,77 @@
|
||||
import 'package:jadb/util/lemmatizer/lemmatizer.dart';
|
||||
|
||||
final LemmatizationRule iAdjectiveBase = LemmatizationRule.simple(
|
||||
name: 'I adjective - base form',
|
||||
terminal: true,
|
||||
pattern: 'い',
|
||||
replacement: 'い',
|
||||
validChildClasses: {WordClass.iAdjective},
|
||||
wordClass: WordClass.iAdjective,
|
||||
);
|
||||
|
||||
final LemmatizationRule iAdjectiveNegative = LemmatizationRule.simple(
|
||||
name: 'I adjective - negative form',
|
||||
pattern: 'くない',
|
||||
replacement: 'い',
|
||||
validChildClasses: {WordClass.iAdjective},
|
||||
wordClass: WordClass.iAdjective,
|
||||
);
|
||||
|
||||
final LemmatizationRule iAdjectivePast = LemmatizationRule.simple(
|
||||
name: 'I adjective - past form',
|
||||
pattern: 'かった',
|
||||
replacement: 'い',
|
||||
validChildClasses: {WordClass.iAdjective},
|
||||
wordClass: WordClass.iAdjective,
|
||||
);
|
||||
|
||||
final LemmatizationRule iAdjectiveNegativePast = LemmatizationRule.simple(
|
||||
name: 'I adjective - negative past form',
|
||||
pattern: 'くなかった',
|
||||
replacement: 'い',
|
||||
validChildClasses: {WordClass.iAdjective},
|
||||
wordClass: WordClass.iAdjective,
|
||||
);
|
||||
|
||||
final LemmatizationRule iAdjectiveTe = LemmatizationRule.simple(
|
||||
name: 'I adjective - te-form',
|
||||
pattern: 'くて',
|
||||
replacement: 'い',
|
||||
validChildClasses: {WordClass.iAdjective},
|
||||
wordClass: WordClass.iAdjective,
|
||||
);
|
||||
|
||||
final LemmatizationRule iAdjectiveConditional = LemmatizationRule.simple(
|
||||
name: 'I adjective - conditional form',
|
||||
pattern: 'ければ',
|
||||
replacement: 'い',
|
||||
validChildClasses: {WordClass.iAdjective},
|
||||
wordClass: WordClass.iAdjective,
|
||||
);
|
||||
|
||||
final LemmatizationRule iAdjectiveVolitional = LemmatizationRule.simple(
|
||||
name: 'I adjective - volitional form',
|
||||
pattern: 'かろう',
|
||||
replacement: 'い',
|
||||
validChildClasses: {WordClass.iAdjective},
|
||||
wordClass: WordClass.iAdjective,
|
||||
);
|
||||
|
||||
final LemmatizationRule iAdjectiveContinuative = LemmatizationRule.simple(
|
||||
name: 'I adjective - continuative form',
|
||||
pattern: 'く',
|
||||
replacement: 'い',
|
||||
validChildClasses: {WordClass.iAdjective},
|
||||
wordClass: WordClass.iAdjective,
|
||||
);
|
||||
|
||||
final List<LemmatizationRule> iAdjectiveLemmatizationRules = List.unmodifiable([
|
||||
iAdjectiveBase,
|
||||
iAdjectiveNegative,
|
||||
iAdjectivePast,
|
||||
iAdjectiveNegativePast,
|
||||
iAdjectiveTe,
|
||||
iAdjectiveConditional,
|
||||
iAdjectiveVolitional,
|
||||
iAdjectiveContinuative,
|
||||
]);
|
||||
@@ -1,241 +0,0 @@
|
||||
import 'package:jadb/util/lemmatizer/lemmatizer.dart';
|
||||
import 'package:jadb/util/text_filtering.dart';
|
||||
|
||||
List<Pattern> lookBehinds = [
|
||||
kanjiRegex,
|
||||
'き',
|
||||
'ぎ',
|
||||
'し',
|
||||
'じ',
|
||||
'ち',
|
||||
'ぢ',
|
||||
'に',
|
||||
'ひ',
|
||||
'び',
|
||||
'び',
|
||||
'み',
|
||||
'り',
|
||||
'け',
|
||||
'げ',
|
||||
'せ',
|
||||
'ぜ',
|
||||
'て',
|
||||
'で',
|
||||
'ね',
|
||||
'へ',
|
||||
'べ',
|
||||
'め',
|
||||
'れ',
|
||||
];
|
||||
|
||||
List<LemmatizationRule> ichidanVerbLemmatizationRules = [
|
||||
LemmatizationRule.simple(
|
||||
name: 'Ichidan verb - base form',
|
||||
terminal: true,
|
||||
pattern: 'る',
|
||||
replacement: 'る',
|
||||
lookAheadBehind: lookBehinds,
|
||||
validChildClasses: [WordClass.ichidanVerb],
|
||||
wordClass: WordClass.ichidanVerb,
|
||||
),
|
||||
LemmatizationRule.simple(
|
||||
name: 'Ichidan verb - negative form',
|
||||
pattern: 'ない',
|
||||
replacement: 'る',
|
||||
lookAheadBehind: lookBehinds,
|
||||
validChildClasses: [WordClass.ichidanVerb],
|
||||
wordClass: WordClass.ichidanVerb,
|
||||
),
|
||||
LemmatizationRule.simple(
|
||||
name: 'Ichidan verb - past form',
|
||||
pattern: 'た',
|
||||
replacement: 'る',
|
||||
lookAheadBehind: lookBehinds,
|
||||
validChildClasses: [WordClass.ichidanVerb],
|
||||
wordClass: WordClass.ichidanVerb,
|
||||
),
|
||||
LemmatizationRule.simple(
|
||||
name: 'Ichidan verb - te-form',
|
||||
pattern: 'て',
|
||||
replacement: 'る',
|
||||
lookAheadBehind: lookBehinds,
|
||||
validChildClasses: [WordClass.ichidanVerb],
|
||||
wordClass: WordClass.ichidanVerb,
|
||||
),
|
||||
LemmatizationRule.simple(
|
||||
name: 'Ichidan verb - te-form with いる',
|
||||
pattern: 'ている',
|
||||
replacement: 'る',
|
||||
lookAheadBehind: lookBehinds,
|
||||
validChildClasses: [WordClass.ichidanVerb],
|
||||
wordClass: WordClass.ichidanVerb,
|
||||
),
|
||||
LemmatizationRule.simple(
|
||||
name: 'Ichidan verb - te-form with いた',
|
||||
pattern: 'ていた',
|
||||
replacement: 'る',
|
||||
lookAheadBehind: lookBehinds,
|
||||
validChildClasses: [WordClass.ichidanVerb],
|
||||
wordClass: WordClass.ichidanVerb,
|
||||
),
|
||||
LemmatizationRule.simple(
|
||||
name: 'Ichidan verb - conditional form',
|
||||
pattern: 'れば',
|
||||
replacement: 'る',
|
||||
lookAheadBehind: lookBehinds,
|
||||
validChildClasses: [WordClass.ichidanVerb],
|
||||
wordClass: WordClass.ichidanVerb,
|
||||
),
|
||||
LemmatizationRule.simple(
|
||||
name: 'Ichidan verb - volitional form',
|
||||
pattern: 'よう',
|
||||
replacement: 'る',
|
||||
lookAheadBehind: lookBehinds,
|
||||
validChildClasses: [WordClass.ichidanVerb],
|
||||
wordClass: WordClass.ichidanVerb,
|
||||
),
|
||||
LemmatizationRule.simple(
|
||||
name: 'Ichidan verb - potential form',
|
||||
pattern: 'られる',
|
||||
replacement: 'る',
|
||||
lookAheadBehind: lookBehinds,
|
||||
validChildClasses: [WordClass.ichidanVerb],
|
||||
wordClass: WordClass.ichidanVerb,
|
||||
),
|
||||
LemmatizationRule.simple(
|
||||
name: 'Ichidan verb - passive form',
|
||||
pattern: 'られる',
|
||||
replacement: 'る',
|
||||
lookAheadBehind: lookBehinds,
|
||||
validChildClasses: [WordClass.ichidanVerb],
|
||||
wordClass: WordClass.ichidanVerb,
|
||||
),
|
||||
LemmatizationRule.simple(
|
||||
name: 'Ichidan verb - causative form',
|
||||
pattern: 'させる',
|
||||
replacement: 'る',
|
||||
lookAheadBehind: lookBehinds,
|
||||
validChildClasses: [WordClass.ichidanVerb],
|
||||
wordClass: WordClass.ichidanVerb,
|
||||
),
|
||||
LemmatizationRule.simple(
|
||||
name: 'Ichidan verb - causative passive form',
|
||||
pattern: 'させられる',
|
||||
replacement: 'る',
|
||||
lookAheadBehind: lookBehinds,
|
||||
validChildClasses: [WordClass.ichidanVerb],
|
||||
wordClass: WordClass.ichidanVerb,
|
||||
),
|
||||
LemmatizationRule.simple(
|
||||
name: 'Ichidan verb - imperative form',
|
||||
pattern: 'れ',
|
||||
replacement: 'る',
|
||||
lookAheadBehind: lookBehinds,
|
||||
validChildClasses: [WordClass.ichidanVerb],
|
||||
wordClass: WordClass.ichidanVerb,
|
||||
),
|
||||
LemmatizationRule.simple(
|
||||
name: 'Ichidan verb - negative past form',
|
||||
pattern: 'なかった',
|
||||
replacement: 'る',
|
||||
lookAheadBehind: lookBehinds,
|
||||
validChildClasses: [WordClass.ichidanVerb],
|
||||
wordClass: WordClass.ichidanVerb,
|
||||
),
|
||||
LemmatizationRule.simple(
|
||||
name: 'Ichidan verb - negative te-form',
|
||||
pattern: 'なくて',
|
||||
replacement: 'る',
|
||||
lookAheadBehind: lookBehinds,
|
||||
validChildClasses: [WordClass.ichidanVerb],
|
||||
wordClass: WordClass.ichidanVerb,
|
||||
),
|
||||
LemmatizationRule.simple(
|
||||
name: 'Ichidan verb - negative conditional form',
|
||||
pattern: 'なければ',
|
||||
replacement: 'る',
|
||||
lookAheadBehind: lookBehinds,
|
||||
validChildClasses: [WordClass.ichidanVerb],
|
||||
wordClass: WordClass.ichidanVerb,
|
||||
),
|
||||
LemmatizationRule.simple(
|
||||
name: 'Ichidan verb - negative volitional form',
|
||||
pattern: 'なかろう',
|
||||
replacement: 'る',
|
||||
lookAheadBehind: lookBehinds,
|
||||
validChildClasses: [WordClass.ichidanVerb],
|
||||
wordClass: WordClass.ichidanVerb,
|
||||
),
|
||||
LemmatizationRule.simple(
|
||||
name: 'Ichidan verb - negative potential form',
|
||||
pattern: 'られない',
|
||||
replacement: 'る',
|
||||
lookAheadBehind: lookBehinds,
|
||||
validChildClasses: [WordClass.ichidanVerb],
|
||||
wordClass: WordClass.ichidanVerb,
|
||||
),
|
||||
LemmatizationRule.simple(
|
||||
name: 'Ichidan verb - negative passive form',
|
||||
pattern: 'られない',
|
||||
replacement: 'る',
|
||||
lookAheadBehind: lookBehinds,
|
||||
validChildClasses: [WordClass.ichidanVerb],
|
||||
wordClass: WordClass.ichidanVerb,
|
||||
),
|
||||
LemmatizationRule.simple(
|
||||
name: 'Ichidan verb - negative causative form',
|
||||
pattern: 'させない',
|
||||
replacement: 'る',
|
||||
lookAheadBehind: lookBehinds,
|
||||
validChildClasses: [WordClass.ichidanVerb],
|
||||
wordClass: WordClass.ichidanVerb,
|
||||
),
|
||||
LemmatizationRule.simple(
|
||||
name: 'Ichidan verb - negative causative passive form',
|
||||
pattern: 'させられない',
|
||||
replacement: 'る',
|
||||
lookAheadBehind: lookBehinds,
|
||||
validChildClasses: [WordClass.ichidanVerb],
|
||||
wordClass: WordClass.ichidanVerb,
|
||||
),
|
||||
LemmatizationRule.simple(
|
||||
name: 'Ichidan verb - negative imperative form',
|
||||
pattern: 'るな',
|
||||
replacement: 'る',
|
||||
lookAheadBehind: lookBehinds,
|
||||
validChildClasses: [WordClass.ichidanVerb],
|
||||
wordClass: WordClass.ichidanVerb,
|
||||
),
|
||||
LemmatizationRule.simple(
|
||||
name: 'Ichidan verb - desire form',
|
||||
pattern: 'たい',
|
||||
replacement: 'る',
|
||||
lookAheadBehind: lookBehinds,
|
||||
validChildClasses: [WordClass.ichidanVerb],
|
||||
wordClass: WordClass.ichidanVerb,
|
||||
),
|
||||
LemmatizationRule.simple(
|
||||
name: 'Ichidan verb - negative desire form',
|
||||
pattern: 'たくない',
|
||||
replacement: 'る',
|
||||
lookAheadBehind: lookBehinds,
|
||||
validChildClasses: [WordClass.ichidanVerb],
|
||||
wordClass: WordClass.ichidanVerb,
|
||||
),
|
||||
LemmatizationRule.simple(
|
||||
name: 'Ichidan verb - past desire form',
|
||||
pattern: 'たかった',
|
||||
replacement: 'る',
|
||||
lookAheadBehind: lookBehinds,
|
||||
validChildClasses: [WordClass.ichidanVerb],
|
||||
wordClass: WordClass.ichidanVerb,
|
||||
),
|
||||
LemmatizationRule.simple(
|
||||
name: 'Ichidan verb - negative past desire form',
|
||||
pattern: 'たくなかった',
|
||||
replacement: 'る',
|
||||
lookAheadBehind: lookBehinds,
|
||||
validChildClasses: [WordClass.ichidanVerb],
|
||||
wordClass: WordClass.ichidanVerb,
|
||||
),
|
||||
];
|
||||
331
lib/util/lemmatizer/rules/ichidan_verbs.dart
Normal file
331
lib/util/lemmatizer/rules/ichidan_verbs.dart
Normal file
@@ -0,0 +1,331 @@
|
||||
import 'package:jadb/util/lemmatizer/lemmatizer.dart';
|
||||
import 'package:jadb/util/text_filtering.dart';
|
||||
|
||||
final List<Pattern> _lookBehinds = [
|
||||
kanjiRegex,
|
||||
'き',
|
||||
'ぎ',
|
||||
'し',
|
||||
'じ',
|
||||
'ち',
|
||||
'ぢ',
|
||||
'に',
|
||||
'ひ',
|
||||
'び',
|
||||
'び',
|
||||
'み',
|
||||
'り',
|
||||
'け',
|
||||
'げ',
|
||||
'せ',
|
||||
'ぜ',
|
||||
'て',
|
||||
'で',
|
||||
'ね',
|
||||
'へ',
|
||||
'べ',
|
||||
'め',
|
||||
'れ',
|
||||
];
|
||||
|
||||
final LemmatizationRule ichidanVerbBase = LemmatizationRule.simple(
|
||||
name: 'Ichidan verb - base form',
|
||||
terminal: true,
|
||||
pattern: 'る',
|
||||
replacement: 'る',
|
||||
lookAheadBehind: _lookBehinds,
|
||||
validChildClasses: {WordClass.ichidanVerb},
|
||||
wordClass: WordClass.ichidanVerb,
|
||||
);
|
||||
|
||||
final LemmatizationRule ichidanVerbNegative = LemmatizationRule.simple(
|
||||
name: 'Ichidan verb - negative form',
|
||||
pattern: 'ない',
|
||||
replacement: 'る',
|
||||
lookAheadBehind: _lookBehinds,
|
||||
validChildClasses: {WordClass.ichidanVerb},
|
||||
wordClass: WordClass.ichidanVerb,
|
||||
);
|
||||
final LemmatizationRule ichidanVerbPast = LemmatizationRule.simple(
|
||||
name: 'Ichidan verb - past form',
|
||||
pattern: 'た',
|
||||
replacement: 'る',
|
||||
lookAheadBehind: _lookBehinds,
|
||||
validChildClasses: {WordClass.ichidanVerb},
|
||||
wordClass: WordClass.ichidanVerb,
|
||||
);
|
||||
|
||||
final LemmatizationRule ichidanVerbTe = LemmatizationRule.simple(
|
||||
name: 'Ichidan verb - te-form',
|
||||
pattern: 'て',
|
||||
replacement: 'る',
|
||||
lookAheadBehind: _lookBehinds,
|
||||
validChildClasses: {WordClass.ichidanVerb},
|
||||
wordClass: WordClass.ichidanVerb,
|
||||
);
|
||||
|
||||
final LemmatizationRule ichidanVerbTeiru = LemmatizationRule.simple(
|
||||
name: 'Ichidan verb - te-form with いる',
|
||||
pattern: 'ている',
|
||||
replacement: 'る',
|
||||
lookAheadBehind: _lookBehinds,
|
||||
validChildClasses: {WordClass.ichidanVerb},
|
||||
wordClass: WordClass.ichidanVerb,
|
||||
);
|
||||
|
||||
final LemmatizationRule ichidanVerbTeita = LemmatizationRule.simple(
|
||||
name: 'Ichidan verb - te-form with いた',
|
||||
pattern: 'ていた',
|
||||
replacement: 'る',
|
||||
lookAheadBehind: _lookBehinds,
|
||||
validChildClasses: {WordClass.ichidanVerb},
|
||||
wordClass: WordClass.ichidanVerb,
|
||||
);
|
||||
|
||||
final LemmatizationRule ichidanVerbConditional = LemmatizationRule.simple(
|
||||
name: 'Ichidan verb - conditional form',
|
||||
pattern: 'れば',
|
||||
replacement: 'る',
|
||||
lookAheadBehind: _lookBehinds,
|
||||
validChildClasses: {WordClass.ichidanVerb},
|
||||
wordClass: WordClass.ichidanVerb,
|
||||
);
|
||||
|
||||
final LemmatizationRule ichidanVerbVolitional = LemmatizationRule.simple(
|
||||
name: 'Ichidan verb - volitional form',
|
||||
pattern: 'よう',
|
||||
replacement: 'る',
|
||||
lookAheadBehind: _lookBehinds,
|
||||
validChildClasses: {WordClass.ichidanVerb},
|
||||
wordClass: WordClass.ichidanVerb,
|
||||
);
|
||||
|
||||
final LemmatizationRule ichidanVerbPotential = LemmatizationRule.simple(
|
||||
name: 'Ichidan verb - potential form',
|
||||
pattern: 'られる',
|
||||
replacement: 'る',
|
||||
lookAheadBehind: _lookBehinds,
|
||||
validChildClasses: {WordClass.ichidanVerb},
|
||||
wordClass: WordClass.ichidanVerb,
|
||||
);
|
||||
|
||||
final LemmatizationRule ichidanVerbPassive = LemmatizationRule.simple(
|
||||
name: 'Ichidan verb - passive form',
|
||||
pattern: 'られる',
|
||||
replacement: 'る',
|
||||
lookAheadBehind: _lookBehinds,
|
||||
validChildClasses: {WordClass.ichidanVerb},
|
||||
wordClass: WordClass.ichidanVerb,
|
||||
);
|
||||
|
||||
final LemmatizationRule ichidanVerbCausative = LemmatizationRule.simple(
|
||||
name: 'Ichidan verb - causative form',
|
||||
pattern: 'させる',
|
||||
replacement: 'る',
|
||||
lookAheadBehind: _lookBehinds,
|
||||
validChildClasses: {WordClass.ichidanVerb},
|
||||
wordClass: WordClass.ichidanVerb,
|
||||
);
|
||||
|
||||
final LemmatizationRule ichidanVerbCausativePassive = LemmatizationRule.simple(
|
||||
name: 'Ichidan verb - causative passive form',
|
||||
pattern: 'させられる',
|
||||
replacement: 'る',
|
||||
lookAheadBehind: _lookBehinds,
|
||||
validChildClasses: {WordClass.ichidanVerb},
|
||||
wordClass: WordClass.ichidanVerb,
|
||||
);
|
||||
|
||||
final LemmatizationRule ichidanVerbImperative = LemmatizationRule.simple(
|
||||
name: 'Ichidan verb - imperative form',
|
||||
pattern: 'れ',
|
||||
replacement: 'る',
|
||||
lookAheadBehind: _lookBehinds,
|
||||
validChildClasses: {WordClass.ichidanVerb},
|
||||
wordClass: WordClass.ichidanVerb,
|
||||
);
|
||||
|
||||
final LemmatizationRule ichidanVerbNegativePast = LemmatizationRule.simple(
|
||||
name: 'Ichidan verb - negative past form',
|
||||
pattern: 'なかった',
|
||||
replacement: 'る',
|
||||
lookAheadBehind: _lookBehinds,
|
||||
validChildClasses: {WordClass.ichidanVerb},
|
||||
wordClass: WordClass.ichidanVerb,
|
||||
);
|
||||
|
||||
final LemmatizationRule ichidanVerbNegativeTe = LemmatizationRule.simple(
|
||||
name: 'Ichidan verb - negative te-form',
|
||||
pattern: 'なくて',
|
||||
replacement: 'る',
|
||||
lookAheadBehind: _lookBehinds,
|
||||
validChildClasses: {WordClass.ichidanVerb},
|
||||
wordClass: WordClass.ichidanVerb,
|
||||
);
|
||||
|
||||
final LemmatizationRule ichidanVerbNegativeConditional =
|
||||
LemmatizationRule.simple(
|
||||
name: 'Ichidan verb - negative conditional form',
|
||||
pattern: 'なければ',
|
||||
replacement: 'る',
|
||||
lookAheadBehind: _lookBehinds,
|
||||
validChildClasses: {WordClass.ichidanVerb},
|
||||
wordClass: WordClass.ichidanVerb,
|
||||
);
|
||||
|
||||
final LemmatizationRule ichidanVerbNegativeConditionalVariant1 =
|
||||
LemmatizationRule.simple(
|
||||
name: 'Ichidan verb - negative conditional form (informal variant)',
|
||||
pattern: 'なきゃ',
|
||||
replacement: 'る',
|
||||
lookAheadBehind: _lookBehinds,
|
||||
validChildClasses: {WordClass.ichidanVerb},
|
||||
wordClass: WordClass.ichidanVerb,
|
||||
);
|
||||
|
||||
final LemmatizationRule ichidanVerbNegativeConditionalVariant2 =
|
||||
LemmatizationRule.simple(
|
||||
name: 'Ichidan verb - negative conditional form (informal variant)',
|
||||
pattern: 'なくちゃ',
|
||||
replacement: 'る',
|
||||
lookAheadBehind: _lookBehinds,
|
||||
validChildClasses: {WordClass.ichidanVerb},
|
||||
wordClass: WordClass.ichidanVerb,
|
||||
);
|
||||
|
||||
final LemmatizationRule ichidanVerbNegativeConditionalVariant3 =
|
||||
LemmatizationRule.simple(
|
||||
name: 'Ichidan verb - negative conditional form (informal variant)',
|
||||
pattern: 'ないと',
|
||||
replacement: 'る',
|
||||
lookAheadBehind: _lookBehinds,
|
||||
validChildClasses: {WordClass.ichidanVerb},
|
||||
wordClass: WordClass.ichidanVerb,
|
||||
);
|
||||
|
||||
final LemmatizationRule ichidanVerbNegativeVolitional =
|
||||
LemmatizationRule.simple(
|
||||
name: 'Ichidan verb - negative volitional form',
|
||||
pattern: 'なかろう',
|
||||
replacement: 'る',
|
||||
lookAheadBehind: _lookBehinds,
|
||||
validChildClasses: {WordClass.ichidanVerb},
|
||||
wordClass: WordClass.ichidanVerb,
|
||||
);
|
||||
|
||||
final LemmatizationRule ichidanVerbNegativePotential = LemmatizationRule.simple(
|
||||
name: 'Ichidan verb - negative potential form',
|
||||
pattern: 'られない',
|
||||
replacement: 'る',
|
||||
lookAheadBehind: _lookBehinds,
|
||||
validChildClasses: {WordClass.ichidanVerb},
|
||||
wordClass: WordClass.ichidanVerb,
|
||||
);
|
||||
|
||||
final LemmatizationRule ichidanVerbNegativePassive = LemmatizationRule.simple(
|
||||
name: 'Ichidan verb - negative passive form',
|
||||
pattern: 'られない',
|
||||
replacement: 'る',
|
||||
lookAheadBehind: _lookBehinds,
|
||||
validChildClasses: {WordClass.ichidanVerb},
|
||||
wordClass: WordClass.ichidanVerb,
|
||||
);
|
||||
|
||||
final LemmatizationRule ichidanVerbNegativeCausative = LemmatizationRule.simple(
|
||||
name: 'Ichidan verb - negative causative form',
|
||||
pattern: 'させない',
|
||||
replacement: 'る',
|
||||
lookAheadBehind: _lookBehinds,
|
||||
validChildClasses: {WordClass.ichidanVerb},
|
||||
wordClass: WordClass.ichidanVerb,
|
||||
);
|
||||
|
||||
final LemmatizationRule ichidanVerbNegativeCausativePassive =
|
||||
LemmatizationRule.simple(
|
||||
name: 'Ichidan verb - negative causative passive form',
|
||||
pattern: 'させられない',
|
||||
replacement: 'る',
|
||||
lookAheadBehind: _lookBehinds,
|
||||
validChildClasses: {WordClass.ichidanVerb},
|
||||
wordClass: WordClass.ichidanVerb,
|
||||
);
|
||||
|
||||
final LemmatizationRule ichidanVerbNegativeImperative =
|
||||
LemmatizationRule.simple(
|
||||
name: 'Ichidan verb - negative imperative form',
|
||||
pattern: 'るな',
|
||||
replacement: 'る',
|
||||
lookAheadBehind: _lookBehinds,
|
||||
validChildClasses: {WordClass.ichidanVerb},
|
||||
wordClass: WordClass.ichidanVerb,
|
||||
);
|
||||
|
||||
final LemmatizationRule ichidanVerbDesire = LemmatizationRule.simple(
|
||||
name: 'Ichidan verb - desire form',
|
||||
pattern: 'たい',
|
||||
replacement: 'る',
|
||||
lookAheadBehind: _lookBehinds,
|
||||
validChildClasses: {WordClass.ichidanVerb},
|
||||
wordClass: WordClass.ichidanVerb,
|
||||
);
|
||||
|
||||
final LemmatizationRule ichidanVerbNegativeDesire = LemmatizationRule.simple(
|
||||
name: 'Ichidan verb - negative desire form',
|
||||
pattern: 'たくない',
|
||||
replacement: 'る',
|
||||
lookAheadBehind: _lookBehinds,
|
||||
validChildClasses: {WordClass.ichidanVerb},
|
||||
wordClass: WordClass.ichidanVerb,
|
||||
);
|
||||
|
||||
final LemmatizationRule ichidanVerbPastDesire = LemmatizationRule.simple(
|
||||
name: 'Ichidan verb - past desire form',
|
||||
pattern: 'たかった',
|
||||
replacement: 'る',
|
||||
lookAheadBehind: _lookBehinds,
|
||||
validChildClasses: {WordClass.ichidanVerb},
|
||||
wordClass: WordClass.ichidanVerb,
|
||||
);
|
||||
|
||||
final LemmatizationRule ichidanVerbNegativePastDesire =
|
||||
LemmatizationRule.simple(
|
||||
name: 'Ichidan verb - negative past desire form',
|
||||
pattern: 'たくなかった',
|
||||
replacement: 'る',
|
||||
lookAheadBehind: _lookBehinds,
|
||||
validChildClasses: {WordClass.ichidanVerb},
|
||||
wordClass: WordClass.ichidanVerb,
|
||||
);
|
||||
|
||||
final List<LemmatizationRule> ichidanVerbLemmatizationRules =
|
||||
List.unmodifiable([
|
||||
ichidanVerbBase,
|
||||
ichidanVerbNegative,
|
||||
ichidanVerbPast,
|
||||
ichidanVerbTe,
|
||||
ichidanVerbTeiru,
|
||||
ichidanVerbTeita,
|
||||
ichidanVerbConditional,
|
||||
ichidanVerbVolitional,
|
||||
ichidanVerbPotential,
|
||||
ichidanVerbPassive,
|
||||
ichidanVerbCausative,
|
||||
ichidanVerbCausativePassive,
|
||||
ichidanVerbImperative,
|
||||
ichidanVerbNegativePast,
|
||||
ichidanVerbNegativeTe,
|
||||
ichidanVerbNegativeConditional,
|
||||
ichidanVerbNegativeConditionalVariant1,
|
||||
ichidanVerbNegativeConditionalVariant2,
|
||||
ichidanVerbNegativeConditionalVariant3,
|
||||
ichidanVerbNegativeVolitional,
|
||||
ichidanVerbNegativePotential,
|
||||
ichidanVerbNegativePassive,
|
||||
ichidanVerbNegativeCausative,
|
||||
ichidanVerbNegativeCausativePassive,
|
||||
ichidanVerbNegativeImperative,
|
||||
ichidanVerbDesire,
|
||||
ichidanVerbNegativeDesire,
|
||||
ichidanVerbPastDesire,
|
||||
ichidanVerbNegativePastDesire,
|
||||
]);
|
||||
@@ -1,9 +1,9 @@
|
||||
// Source: https://github.com/Kimtaro/ve/blob/master/lib/providers/japanese_transliterators.rb
|
||||
|
||||
const hiragana_syllabic_n = 'ん';
|
||||
const hiragana_small_tsu = 'っ';
|
||||
const hiraganaSyllabicN = 'ん';
|
||||
const hiraganaSmallTsu = 'っ';
|
||||
|
||||
const Map<String, String> hiragana_to_latin = {
|
||||
const Map<String, String> hiraganaToLatin = {
|
||||
'あ': 'a',
|
||||
'い': 'i',
|
||||
'う': 'u',
|
||||
@@ -209,7 +209,7 @@ const Map<String, String> hiragana_to_latin = {
|
||||
'ゟ': 'yori',
|
||||
};
|
||||
|
||||
const Map<String, String> latin_to_hiragana = {
|
||||
const Map<String, String> latinToHiragana = {
|
||||
'a': 'あ',
|
||||
'i': 'い',
|
||||
'u': 'う',
|
||||
@@ -481,12 +481,13 @@ const Map<String, String> latin_to_hiragana = {
|
||||
'#~': '〜',
|
||||
};
|
||||
|
||||
bool _smallTsu(String forConversion) => forConversion == hiragana_small_tsu;
|
||||
bool _smallTsu(String forConversion) => forConversion == hiraganaSmallTsu;
|
||||
bool _nFollowedByYuYeYo(String forConversion, String kana) =>
|
||||
forConversion == hiragana_syllabic_n &&
|
||||
forConversion == hiraganaSyllabicN &&
|
||||
kana.length > 1 &&
|
||||
'やゆよ'.contains(kana.substring(1, 2));
|
||||
|
||||
/// Transliterates a string of hiragana characters to Latin script (romaji).
|
||||
String transliterateHiraganaToLatin(String hiragana) {
|
||||
String kana = hiragana;
|
||||
String romaji = '';
|
||||
@@ -505,7 +506,7 @@ String transliterateHiraganaToLatin(String hiragana) {
|
||||
} else if (_nFollowedByYuYeYo(forConversion, kana)) {
|
||||
mora = "n'";
|
||||
}
|
||||
mora ??= hiragana_to_latin[forConversion];
|
||||
mora ??= hiraganaToLatin[forConversion];
|
||||
|
||||
if (mora != null) {
|
||||
if (geminate) {
|
||||
@@ -524,15 +525,61 @@ String transliterateHiraganaToLatin(String hiragana) {
|
||||
return romaji;
|
||||
}
|
||||
|
||||
/// Returns a list of pairs of indices into the input and output strings,
|
||||
/// indicating which characters in the input string correspond to which characters in the output string.
|
||||
List<(int, int)> transliterateHiraganaToLatinSpan(String hiragana) {
|
||||
String kana = hiragana;
|
||||
String romaji = '';
|
||||
final List<(int, int)> spans = [];
|
||||
bool geminate = false;
|
||||
int kanaIndex = 0;
|
||||
|
||||
while (kana.isNotEmpty) {
|
||||
final lengths = [if (kana.length > 1) 2, 1];
|
||||
for (final length in lengths) {
|
||||
final String forConversion = kana.substring(0, length);
|
||||
String? mora;
|
||||
|
||||
if (_smallTsu(forConversion)) {
|
||||
geminate = true;
|
||||
kana = kana.replaceRange(0, length, '');
|
||||
break;
|
||||
} else if (_nFollowedByYuYeYo(forConversion, kana)) {
|
||||
mora = "n'";
|
||||
}
|
||||
mora ??= hiraganaToLatin[forConversion];
|
||||
|
||||
if (mora != null) {
|
||||
if (geminate) {
|
||||
geminate = false;
|
||||
romaji += mora.substring(0, 1);
|
||||
}
|
||||
spans.add((kanaIndex, romaji.length));
|
||||
romaji += mora;
|
||||
kana = kana.replaceRange(0, length, '');
|
||||
kanaIndex += length;
|
||||
break;
|
||||
} else if (length == 1) {
|
||||
spans.add((kanaIndex, romaji.length));
|
||||
romaji += forConversion;
|
||||
kana = kana.replaceRange(0, length, '');
|
||||
kanaIndex += length;
|
||||
}
|
||||
}
|
||||
}
|
||||
return spans;
|
||||
}
|
||||
|
||||
bool _doubleNFollowedByAIUEO(String forConversion) =>
|
||||
RegExp(r'^nn[aiueo]$').hasMatch(forConversion);
|
||||
bool _hasTableMatch(String forConversion) =>
|
||||
latin_to_hiragana[forConversion] != null;
|
||||
latinToHiragana[forConversion] != null;
|
||||
bool _hasDoubleConsonant(String forConversion, int length) =>
|
||||
forConversion == 'tch' ||
|
||||
(length == 2 &&
|
||||
RegExp(r'^([kgsztdnbpmyrlwchf])\1$').hasMatch(forConversion));
|
||||
|
||||
/// Transliterates a string of Latin script (romaji) to hiragana characters.
|
||||
String transliterateLatinToHiragana(String latin) {
|
||||
String romaji = latin
|
||||
.toLowerCase()
|
||||
@@ -549,12 +596,12 @@ String transliterateLatinToHiragana(String latin) {
|
||||
final String forConversion = romaji.substring(0, length);
|
||||
|
||||
if (_doubleNFollowedByAIUEO(forConversion)) {
|
||||
mora = hiragana_syllabic_n;
|
||||
mora = hiraganaSyllabicN;
|
||||
forRemoval = 1;
|
||||
} else if (_hasTableMatch(forConversion)) {
|
||||
mora = latin_to_hiragana[forConversion];
|
||||
mora = latinToHiragana[forConversion];
|
||||
} else if (_hasDoubleConsonant(forConversion, length)) {
|
||||
mora = hiragana_small_tsu;
|
||||
mora = hiraganaSmallTsu;
|
||||
forRemoval = 1;
|
||||
}
|
||||
|
||||
@@ -572,6 +619,53 @@ String transliterateLatinToHiragana(String latin) {
|
||||
return kana;
|
||||
}
|
||||
|
||||
/// Returns a list of pairs of indices into the input and output strings,
|
||||
/// indicating which characters in the input string correspond to which characters in the output string.
|
||||
List<(int, int)> transliterateLatinToHiraganaSpan(String latin) {
|
||||
String romaji = latin
|
||||
.toLowerCase()
|
||||
.replaceAll('mb', 'nb')
|
||||
.replaceAll('mp', 'np');
|
||||
String kana = '';
|
||||
final List<(int, int)> spans = [];
|
||||
int latinIndex = 0;
|
||||
|
||||
while (romaji.isNotEmpty) {
|
||||
final lengths = [if (romaji.length > 2) 3, if (romaji.length > 1) 2, 1];
|
||||
|
||||
for (final length in lengths) {
|
||||
String? mora;
|
||||
int forRemoval = length;
|
||||
final String forConversion = romaji.substring(0, length);
|
||||
|
||||
if (_doubleNFollowedByAIUEO(forConversion)) {
|
||||
mora = hiraganaSyllabicN;
|
||||
forRemoval = 1;
|
||||
} else if (_hasTableMatch(forConversion)) {
|
||||
mora = latinToHiragana[forConversion];
|
||||
} else if (_hasDoubleConsonant(forConversion, length)) {
|
||||
mora = hiraganaSmallTsu;
|
||||
forRemoval = 1;
|
||||
}
|
||||
|
||||
if (mora != null) {
|
||||
spans.add((latinIndex, kana.length));
|
||||
kana += mora;
|
||||
romaji = romaji.replaceRange(0, forRemoval, '');
|
||||
latinIndex += forRemoval;
|
||||
break;
|
||||
} else if (length == 1) {
|
||||
spans.add((latinIndex, kana.length));
|
||||
kana += forConversion;
|
||||
romaji = romaji.replaceRange(0, 1, '');
|
||||
latinIndex += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return spans;
|
||||
}
|
||||
|
||||
String _transposeCodepointsInRange(
|
||||
String text,
|
||||
int distance,
|
||||
@@ -583,15 +677,19 @@ String _transposeCodepointsInRange(
|
||||
),
|
||||
);
|
||||
|
||||
/// Transliterates a string of kana characters (hiragana or katakana) to Latin script (romaji).
|
||||
String transliterateKanaToLatin(String kana) =>
|
||||
transliterateHiraganaToLatin(transliterateKatakanaToHiragana(kana));
|
||||
|
||||
/// Transliterates a string of Latin script (romaji) to katakana characters.
|
||||
String transliterateLatinToKatakana(String latin) =>
|
||||
transliterateHiraganaToKatakana(transliterateLatinToHiragana(latin));
|
||||
|
||||
/// Transliterates a string of katakana characters to hiragana characters.
|
||||
String transliterateKatakanaToHiragana(String katakana) =>
|
||||
_transposeCodepointsInRange(katakana, -96, 12449, 12534);
|
||||
|
||||
/// Transliterates a string of hiragana characters to katakana characters.
|
||||
String transliterateHiraganaToKatakana(String hiragana) =>
|
||||
_transposeCodepointsInRange(hiragana, 96, 12353, 12438);
|
||||
|
||||
|
||||
@@ -1,3 +1,16 @@
|
||||
CREATE TABLE "JMdict_Version" (
|
||||
"version" VARCHAR(10) PRIMARY KEY NOT NULL,
|
||||
"date" DATE NOT NULL,
|
||||
"hash" VARCHAR(64) NOT NULL
|
||||
) WITHOUT ROWID;
|
||||
|
||||
CREATE TRIGGER "JMdict_Version_SingleRow"
|
||||
BEFORE INSERT ON "JMdict_Version"
|
||||
WHEN (SELECT COUNT(*) FROM "JMdict_Version") >= 1
|
||||
BEGIN
|
||||
SELECT RAISE(FAIL, 'Only one row allowed in JMdict_Version');
|
||||
END;
|
||||
|
||||
CREATE TABLE "JMdict_InfoDialect" (
|
||||
"id" VARCHAR(4) PRIMARY KEY NOT NULL,
|
||||
"description" TEXT NOT NULL
|
||||
|
||||
@@ -1,3 +1,16 @@
|
||||
CREATE TABLE "JMdict_JLPT_Version" (
|
||||
"version" VARCHAR(10) PRIMARY KEY NOT NULL,
|
||||
"date" DATE NOT NULL,
|
||||
"hash" VARCHAR(64) NOT NULL
|
||||
) WITHOUT ROWID;
|
||||
|
||||
CREATE TRIGGER "JMdict_JLPT_Version_SingleRow"
|
||||
BEFORE INSERT ON "JMdict_JLPT_Version"
|
||||
WHEN (SELECT COUNT(*) FROM "JMdict_JLPT_Version") >= 1
|
||||
BEGIN
|
||||
SELECT RAISE(FAIL, 'Only one row allowed in JMdict_JLPT_Version');
|
||||
END;
|
||||
|
||||
CREATE TABLE "JMdict_JLPTTag" (
|
||||
"entryId" INTEGER NOT NULL,
|
||||
"jlptLevel" CHAR(2) NOT NULL CHECK ("jlptLevel" in ('N5', 'N4', 'N3', 'N2', 'N1')),
|
||||
|
||||
@@ -1,3 +1,16 @@
|
||||
CREATE TABLE "RADKFILE_Version" (
|
||||
"version" VARCHAR(10) PRIMARY KEY NOT NULL,
|
||||
"date" DATE NOT NULL,
|
||||
"hash" VARCHAR(64) NOT NULL
|
||||
) WITHOUT ROWID;
|
||||
|
||||
CREATE TRIGGER "RADKFILE_Version_SingleRow"
|
||||
BEFORE INSERT ON "RADKFILE_Version"
|
||||
WHEN (SELECT COUNT(*) FROM "RADKFILE_Version") >= 1
|
||||
BEGIN
|
||||
SELECT RAISE(FAIL, 'Only one row allowed in RADKFILE_Version');
|
||||
END;
|
||||
|
||||
CREATE TABLE "RADKFILE" (
|
||||
"kanji" CHAR(1) NOT NULL,
|
||||
"radical" CHAR(1) NOT NULL,
|
||||
|
||||
@@ -1,3 +1,16 @@
|
||||
CREATE TABLE "KANJIDIC_Version" (
|
||||
"version" VARCHAR(10) PRIMARY KEY NOT NULL,
|
||||
"date" DATE NOT NULL,
|
||||
"hash" VARCHAR(64) NOT NULL
|
||||
) WITHOUT ROWID;
|
||||
|
||||
CREATE TRIGGER "KANJIDIC_Version_SingleRow"
|
||||
BEFORE INSERT ON "KANJIDIC_Version"
|
||||
WHEN (SELECT COUNT(*) FROM "KANJIDIC_Version") >= 1
|
||||
BEGIN
|
||||
SELECT RAISE(FAIL, 'Only one row allowed in KANJIDIC_Version');
|
||||
END;
|
||||
|
||||
CREATE TABLE "KANJIDIC_Character" (
|
||||
"literal" CHAR(1) NOT NULL PRIMARY KEY,
|
||||
"grade" INTEGER CHECK ("grade" BETWEEN 1 AND 10),
|
||||
|
||||
@@ -7,6 +7,29 @@ buildDartApplication {
|
||||
version = "1.0.0";
|
||||
inherit src;
|
||||
|
||||
dartEntryPoints."bin/jadb" = "bin/jadb.dart";
|
||||
|
||||
# NOTE: the default dart hooks are using `dart compile`, which is not able to call the
|
||||
# new dart build hooks required to use package:sqlite3 >= 3.0.0. So we override
|
||||
# these phases to use `dart build` instead.
|
||||
buildPhase = ''
|
||||
runHook preBuild
|
||||
|
||||
mkdir -p "$out/bin"
|
||||
dart build cli --target "bin/jadb.dart"
|
||||
|
||||
runHook postBuild
|
||||
'';
|
||||
|
||||
installPhase = ''
|
||||
runHook preInstall
|
||||
|
||||
mkdir -p "$out"
|
||||
mv build/cli/*/bundle/* "$out/"
|
||||
|
||||
runHook postInstall
|
||||
'';
|
||||
|
||||
autoPubspecLock = ../pubspec.lock;
|
||||
|
||||
meta.mainProgram = "jadb";
|
||||
|
||||
114
pubspec.lock
114
pubspec.lock
@@ -5,18 +5,18 @@ packages:
|
||||
dependency: transitive
|
||||
description:
|
||||
name: _fe_analyzer_shared
|
||||
sha256: da0d9209ca76bde579f2da330aeb9df62b6319c834fa7baae052021b0462401f
|
||||
sha256: "3b19a47f6ea7c2632760777c78174f47f6aec1e05f0cd611380d4593b8af1dbc"
|
||||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "85.0.0"
|
||||
version: "96.0.0"
|
||||
analyzer:
|
||||
dependency: transitive
|
||||
description:
|
||||
name: analyzer
|
||||
sha256: b1ade5707ab7a90dfd519eaac78a7184341d19adb6096c68d499b59c7c6cf880
|
||||
sha256: "0c516bc4ad36a1a75759e54d5047cb9d15cded4459df01aa35a0b5ec7db2c2a0"
|
||||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "7.7.0"
|
||||
version: "10.2.0"
|
||||
args:
|
||||
dependency: "direct main"
|
||||
description:
|
||||
@@ -49,6 +49,14 @@ packages:
|
||||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "0.2.0"
|
||||
code_assets:
|
||||
dependency: transitive
|
||||
description:
|
||||
name: code_assets
|
||||
sha256: "83ccdaa064c980b5596c35dd64a8d3ecc68620174ab9b90b6343b753aa721687"
|
||||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "1.0.0"
|
||||
collection:
|
||||
dependency: "direct main"
|
||||
description:
|
||||
@@ -77,34 +85,34 @@ packages:
|
||||
dependency: transitive
|
||||
description:
|
||||
name: crypto
|
||||
sha256: "1e445881f28f22d6140f181e07737b22f1e099a5e1ff94b0af2f9e4a463f4855"
|
||||
sha256: c8ea0233063ba03258fbcf2ca4d6dadfefe14f02fab57702265467a19f27fadf
|
||||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "3.0.6"
|
||||
version: "3.0.7"
|
||||
csv:
|
||||
dependency: "direct main"
|
||||
description:
|
||||
name: csv
|
||||
sha256: c6aa2679b2a18cb57652920f674488d89712efaf4d3fdf2e537215b35fc19d6c
|
||||
sha256: bef2950f7a753eb82f894a2eabc3072e73cf21c17096296a5a992797e50b1d0d
|
||||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "6.0.0"
|
||||
version: "7.1.0"
|
||||
equatable:
|
||||
dependency: "direct main"
|
||||
description:
|
||||
name: equatable
|
||||
sha256: "567c64b3cb4cf82397aac55f4f0cbd3ca20d77c6c03bedbc4ceaddc08904aef7"
|
||||
sha256: "3e0141505477fd8ad55d6eb4e7776d3fe8430be8e497ccb1521370c3f21a3e2b"
|
||||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "2.0.7"
|
||||
version: "2.0.8"
|
||||
ffi:
|
||||
dependency: transitive
|
||||
description:
|
||||
name: ffi
|
||||
sha256: "289279317b4b16eb2bb7e271abccd4bf84ec9bdcbe999e278a94b804f5630418"
|
||||
sha256: "6d7fd89431262d8f3125e81b50d3847a091d846eafcd4fdb88dd06f36d705a45"
|
||||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "2.1.4"
|
||||
version: "2.2.0"
|
||||
file:
|
||||
dependency: transitive
|
||||
description:
|
||||
@@ -129,6 +137,14 @@ packages:
|
||||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "2.1.3"
|
||||
hooks:
|
||||
dependency: transitive
|
||||
description:
|
||||
name: hooks
|
||||
sha256: "7a08a0d684cb3b8fb604b78455d5d352f502b68079f7b80b831c62220ab0a4f6"
|
||||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "1.0.1"
|
||||
http_multi_server:
|
||||
dependency: transitive
|
||||
description:
|
||||
@@ -153,22 +169,14 @@ packages:
|
||||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "1.0.5"
|
||||
js:
|
||||
dependency: transitive
|
||||
description:
|
||||
name: js
|
||||
sha256: "53385261521cc4a0c4658fd0ad07a7d14591cf8fc33abbceae306ddb974888dc"
|
||||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "0.7.2"
|
||||
lints:
|
||||
dependency: "direct dev"
|
||||
description:
|
||||
name: lints
|
||||
sha256: a5e2b223cb7c9c8efdc663ef484fdd95bb243bff242ef5b13e26883547fce9a0
|
||||
sha256: "12f842a479589fea194fe5c5a3095abc7be0c1f2ddfa9a0e76aed1dbd26a87df"
|
||||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "6.0.0"
|
||||
version: "6.1.0"
|
||||
logging:
|
||||
dependency: transitive
|
||||
description:
|
||||
@@ -181,18 +189,18 @@ packages:
|
||||
dependency: transitive
|
||||
description:
|
||||
name: matcher
|
||||
sha256: dc58c723c3c24bf8d3e2d3ad3f2f9d7bd9cf43ec6feaa64181775e60190153f2
|
||||
sha256: "12956d0ad8390bbcc63ca2e1469c0619946ccb52809807067a7020d57e647aa6"
|
||||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "0.12.17"
|
||||
version: "0.12.18"
|
||||
meta:
|
||||
dependency: transitive
|
||||
description:
|
||||
name: meta
|
||||
sha256: "23f08335362185a5ea2ad3a4e597f1375e78bce8a040df5c600c8d3552ef2394"
|
||||
sha256: "9f29b9bcc8ee287b1a31e0d01be0eae99a930dbffdaecf04b3f3d82a969f296f"
|
||||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "1.17.0"
|
||||
version: "1.18.1"
|
||||
mime:
|
||||
dependency: transitive
|
||||
description:
|
||||
@@ -201,6 +209,14 @@ packages:
|
||||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "2.0.0"
|
||||
native_toolchain_c:
|
||||
dependency: transitive
|
||||
description:
|
||||
name: native_toolchain_c
|
||||
sha256: "89e83885ba09da5fdf2cdacc8002a712ca238c28b7f717910b34bcd27b0d03ac"
|
||||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "0.17.4"
|
||||
node_preamble:
|
||||
dependency: transitive
|
||||
description:
|
||||
@@ -229,18 +245,18 @@ packages:
|
||||
dependency: transitive
|
||||
description:
|
||||
name: petitparser
|
||||
sha256: "9436fe11f82d7cc1642a8671e5aa4149ffa9ae9116e6cf6dd665fc0653e3825c"
|
||||
sha256: "91bd59303e9f769f108f8df05e371341b15d59e995e6806aefab827b58336675"
|
||||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "7.0.0"
|
||||
version: "7.0.2"
|
||||
pool:
|
||||
dependency: transitive
|
||||
description:
|
||||
name: pool
|
||||
sha256: "20fe868b6314b322ea036ba325e6fc0711a22948856475e2c2b6306e8ab39c2a"
|
||||
sha256: "978783255c543aa3586a1b3c21f6e9d720eb315376a915872c61ef8b5c20177d"
|
||||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "1.5.1"
|
||||
version: "1.5.2"
|
||||
pub_semver:
|
||||
dependency: transitive
|
||||
description:
|
||||
@@ -301,34 +317,34 @@ packages:
|
||||
dependency: transitive
|
||||
description:
|
||||
name: source_span
|
||||
sha256: "254ee5351d6cb365c859e20ee823c3bb479bf4a293c22d17a9f1bf144ce86f7c"
|
||||
sha256: "56a02f1f4cd1a2d96303c0144c93bd6d909eea6bee6bf5a0e0b685edbd4c47ab"
|
||||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "1.10.1"
|
||||
version: "1.10.2"
|
||||
sqflite_common:
|
||||
dependency: "direct main"
|
||||
description:
|
||||
name: sqflite_common
|
||||
sha256: "84731e8bfd8303a3389903e01fb2141b6e59b5973cacbb0929021df08dddbe8b"
|
||||
sha256: "6ef422a4525ecc601db6c0a2233ff448c731307906e92cabc9ba292afaae16a6"
|
||||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "2.5.5"
|
||||
version: "2.5.6"
|
||||
sqflite_common_ffi:
|
||||
dependency: "direct main"
|
||||
description:
|
||||
name: sqflite_common_ffi
|
||||
sha256: "9faa2fedc5385ef238ce772589f7718c24cdddd27419b609bb9c6f703ea27988"
|
||||
sha256: c59fcdc143839a77581f7a7c4de018e53682408903a0a0800b95ef2dc4033eff
|
||||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "2.3.6"
|
||||
version: "2.4.0+2"
|
||||
sqlite3:
|
||||
dependency: "direct main"
|
||||
description:
|
||||
name: sqlite3
|
||||
sha256: "608b56d594e4c8498c972c8f1507209f9fd74939971b948ddbbfbfd1c9cb3c15"
|
||||
sha256: b7cf6b37667f6a921281797d2499ffc60fb878b161058d422064f0ddc78f6aa6
|
||||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "2.7.7"
|
||||
version: "3.1.6"
|
||||
stack_trace:
|
||||
dependency: transitive
|
||||
description:
|
||||
@@ -373,26 +389,26 @@ packages:
|
||||
dependency: "direct dev"
|
||||
description:
|
||||
name: test
|
||||
sha256: "65e29d831719be0591f7b3b1a32a3cda258ec98c58c7b25f7b84241bc31215bb"
|
||||
sha256: "54c516bbb7cee2754d327ad4fca637f78abfc3cbcc5ace83b3eda117e42cd71a"
|
||||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "1.26.2"
|
||||
version: "1.29.0"
|
||||
test_api:
|
||||
dependency: transitive
|
||||
description:
|
||||
name: test_api
|
||||
sha256: "522f00f556e73044315fa4585ec3270f1808a4b186c936e612cab0b565ff1e00"
|
||||
sha256: "93167629bfc610f71560ab9312acdda4959de4df6fac7492c89ff0d3886f6636"
|
||||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "0.7.6"
|
||||
version: "0.7.9"
|
||||
test_core:
|
||||
dependency: transitive
|
||||
description:
|
||||
name: test_core
|
||||
sha256: "80bf5a02b60af04b09e14f6fe68b921aad119493e26e490deaca5993fef1b05a"
|
||||
sha256: "394f07d21f0f2255ec9e3989f21e54d3c7dc0e6e9dbce160e5a9c1a6be0e2943"
|
||||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "0.6.11"
|
||||
version: "0.6.15"
|
||||
typed_data:
|
||||
dependency: transitive
|
||||
description:
|
||||
@@ -413,10 +429,10 @@ packages:
|
||||
dependency: transitive
|
||||
description:
|
||||
name: watcher
|
||||
sha256: "0b7fd4a0bbc4b92641dbf20adfd7e3fd1398fe17102d94b674234563e110088a"
|
||||
sha256: "1398c9f081a753f9226febe8900fce8f7d0a67163334e1c94a2438339d79d635"
|
||||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "1.1.2"
|
||||
version: "1.2.1"
|
||||
web:
|
||||
dependency: transitive
|
||||
description:
|
||||
@@ -453,10 +469,10 @@ packages:
|
||||
dependency: "direct main"
|
||||
description:
|
||||
name: xml
|
||||
sha256: "3202a47961c1a0af6097c9f8c1b492d705248ba309e6f7a72410422c05046851"
|
||||
sha256: "971043b3a0d3da28727e40ed3e0b5d18b742fa5a68665cca88e74b7876d5e025"
|
||||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "6.6.0"
|
||||
version: "6.6.1"
|
||||
yaml:
|
||||
dependency: transitive
|
||||
description:
|
||||
@@ -466,4 +482,4 @@ packages:
|
||||
source: hosted
|
||||
version: "3.1.3"
|
||||
sdks:
|
||||
dart: ">=3.8.0 <4.0.0"
|
||||
dart: ">=3.10.1 <4.0.0"
|
||||
|
||||
11
pubspec.yaml
11
pubspec.yaml
@@ -4,17 +4,17 @@ version: 1.0.0
|
||||
homepage: https://git.pvv.ntnu.no/oysteikt/jadb
|
||||
|
||||
environment:
|
||||
sdk: '^3.8.0'
|
||||
sdk: '^3.9.0'
|
||||
|
||||
dependencies:
|
||||
args: ^2.7.0
|
||||
collection: ^1.19.0
|
||||
csv: ^6.0.0
|
||||
csv: ^7.1.0
|
||||
equatable: ^2.0.0
|
||||
path: ^1.9.1
|
||||
sqflite_common: ^2.5.0
|
||||
sqflite_common_ffi: ^2.3.0
|
||||
sqlite3: ^2.7.7
|
||||
sqlite3: ^3.1.6
|
||||
xml: ^6.5.0
|
||||
|
||||
dev_dependencies:
|
||||
@@ -24,6 +24,11 @@ dev_dependencies:
|
||||
executables:
|
||||
jadb: jadb
|
||||
|
||||
hooks:
|
||||
user_defines:
|
||||
sqlite3:
|
||||
source: system
|
||||
|
||||
topics:
|
||||
- database
|
||||
- dictionary
|
||||
|
||||
21
test/const_data/kanji_grades.dart
Normal file
21
test/const_data/kanji_grades.dart
Normal file
@@ -0,0 +1,21 @@
|
||||
import 'package:collection/collection.dart';
|
||||
import 'package:jadb/const_data/kanji_grades.dart';
|
||||
import 'package:test/test.dart';
|
||||
|
||||
void main() {
|
||||
test('All constant kanji in jouyouKanjiByGrades are 2136 in total', () {
|
||||
expect(jouyouKanjiByGrades.values.flattenedToSet.length, 2136);
|
||||
});
|
||||
|
||||
// test('All constant kanji in jouyouKanjiByGrades are present in KANJIDIC2', () {
|
||||
|
||||
// });
|
||||
|
||||
// test('All constant kanji in jouyouKanjiByGrades have matching grade as in KANJIDIC2', () {
|
||||
|
||||
// });
|
||||
|
||||
// test('All constant kanji in jouyouKanjiByGradesAndStrokeCount have matching stroke count as in KANJIDIC2', () {
|
||||
|
||||
// });
|
||||
}
|
||||
17
test/const_data/radicals_test.dart
Normal file
17
test/const_data/radicals_test.dart
Normal file
@@ -0,0 +1,17 @@
|
||||
import 'package:collection/collection.dart';
|
||||
import 'package:jadb/const_data/radicals.dart';
|
||||
import 'package:test/test.dart';
|
||||
|
||||
void main() {
|
||||
test('All constant radicals are 253 in total', () {
|
||||
expect(radicals.values.flattenedToSet.length, 253);
|
||||
});
|
||||
|
||||
// test('All constant radicals have at least 1 associated kanji in KANJIDIC2', () {
|
||||
|
||||
// });
|
||||
|
||||
// test('All constant radicals match the stroke order listed in KANJIDIC2', () {
|
||||
|
||||
// });
|
||||
}
|
||||
@@ -1,9 +0,0 @@
|
||||
import 'package:collection/collection.dart';
|
||||
import 'package:jadb/const_data/kanji_grades.dart';
|
||||
import 'package:test/test.dart';
|
||||
|
||||
void main() {
|
||||
test('Assert 2136 kanji in jouyou set', () {
|
||||
expect(JOUYOU_KANJI_BY_GRADES.values.flattenedToSet.length, 2136);
|
||||
});
|
||||
}
|
||||
@@ -1,30 +1,20 @@
|
||||
import 'dart:ffi';
|
||||
import 'dart:io';
|
||||
|
||||
import 'package:jadb/models/create_empty_db.dart';
|
||||
import 'package:jadb/search.dart';
|
||||
import 'package:sqflite_common_ffi/sqflite_ffi.dart';
|
||||
import 'package:sqlite3/open.dart';
|
||||
// import 'package:sqlite3/open.dart';
|
||||
import 'package:test/test.dart';
|
||||
|
||||
Future<DatabaseExecutor> setup_inmemory_database() async {
|
||||
final libsqlitePath = Platform.environment['LIBSQLITE_PATH'];
|
||||
|
||||
if (libsqlitePath == null) {
|
||||
throw Exception('LIBSQLITE_PATH is not set');
|
||||
}
|
||||
|
||||
final dbConnection = await createDatabaseFactoryFfi(
|
||||
ffiInit: () =>
|
||||
open.overrideForAll(() => DynamicLibrary.open(libsqlitePath)),
|
||||
).openDatabase(':memory:');
|
||||
Future<DatabaseExecutor> setupInMemoryDatabase() async {
|
||||
final dbConnection = await createDatabaseFactoryFfi().openDatabase(
|
||||
':memory:',
|
||||
);
|
||||
|
||||
return dbConnection;
|
||||
}
|
||||
|
||||
void main() {
|
||||
test('Create empty db', () async {
|
||||
final db = await setup_inmemory_database();
|
||||
final db = await setupInMemoryDatabase();
|
||||
|
||||
await createEmptyDb(db);
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@ import 'setup_database_connection.dart';
|
||||
|
||||
void main() {
|
||||
test('Filter kanji', () async {
|
||||
final connection = await setup_database_connection();
|
||||
final connection = await setupDatabaseConnection();
|
||||
|
||||
final result = await connection.filterKanji([
|
||||
'a',
|
||||
@@ -26,4 +26,27 @@ void main() {
|
||||
|
||||
expect(result.join(), '漢字地字');
|
||||
});
|
||||
|
||||
test('Filter kanji - deduplicate', () async {
|
||||
final connection = await setupDatabaseConnection();
|
||||
|
||||
final result = await connection.filterKanji([
|
||||
'a',
|
||||
'b',
|
||||
'c',
|
||||
'漢',
|
||||
'字',
|
||||
'地',
|
||||
'字',
|
||||
'か',
|
||||
'な',
|
||||
'.',
|
||||
'!',
|
||||
'@',
|
||||
';',
|
||||
'々',
|
||||
], deduplicate: true);
|
||||
|
||||
expect(result.join(), '漢字地');
|
||||
});
|
||||
}
|
||||
|
||||
@@ -6,16 +6,16 @@ import 'setup_database_connection.dart';
|
||||
|
||||
void main() {
|
||||
test('Search a kanji', () async {
|
||||
final connection = await setup_database_connection();
|
||||
final connection = await setupDatabaseConnection();
|
||||
|
||||
final result = await connection.jadbSearchKanji('漢');
|
||||
expect(result, isNotNull);
|
||||
});
|
||||
|
||||
group('Search all jouyou kanji', () {
|
||||
JOUYOU_KANJI_BY_GRADES.forEach((grade, characters) {
|
||||
jouyouKanjiByGrades.forEach((grade, characters) {
|
||||
test('Search all kanji in grade $grade', () async {
|
||||
final connection = await setup_database_connection();
|
||||
final connection = await setupDatabaseConnection();
|
||||
|
||||
for (final character in characters) {
|
||||
final result = await connection.jadbSearchKanji(character);
|
||||
|
||||
257
test/search/search_match_inference_test.dart
Normal file
257
test/search/search_match_inference_test.dart
Normal file
@@ -0,0 +1,257 @@
|
||||
import 'package:jadb/models/common/jlpt_level.dart';
|
||||
import 'package:jadb/models/word_search/word_search_match_span.dart';
|
||||
import 'package:jadb/models/word_search/word_search_result.dart';
|
||||
import 'package:jadb/models/word_search/word_search_ruby.dart';
|
||||
import 'package:jadb/models/word_search/word_search_sense.dart';
|
||||
import 'package:jadb/models/word_search/word_search_sources.dart';
|
||||
import 'package:test/test.dart';
|
||||
|
||||
void main() {
|
||||
test('Infer match whole word', () {
|
||||
final wordSearchResult = WordSearchResult(
|
||||
entryId: 0,
|
||||
score: 0,
|
||||
isCommon: false,
|
||||
jlptLevel: JlptLevel.none,
|
||||
kanjiInfo: {},
|
||||
readingInfo: {},
|
||||
japanese: [WordSearchRuby(base: '仮名')],
|
||||
senses: [],
|
||||
sources: WordSearchSources.empty(),
|
||||
);
|
||||
|
||||
wordSearchResult.inferMatchSpans('仮名');
|
||||
|
||||
expect(wordSearchResult.matchSpans, [
|
||||
WordSearchMatchSpan(
|
||||
spanType: WordSearchMatchSpanType.kanji,
|
||||
start: 0,
|
||||
end: 2,
|
||||
index: 0,
|
||||
),
|
||||
]);
|
||||
});
|
||||
|
||||
test('Infer match part of word', () {
|
||||
final wordSearchResult = WordSearchResult(
|
||||
entryId: 0,
|
||||
score: 0,
|
||||
isCommon: false,
|
||||
jlptLevel: JlptLevel.none,
|
||||
kanjiInfo: {},
|
||||
readingInfo: {},
|
||||
japanese: [WordSearchRuby(base: '仮名')],
|
||||
senses: [],
|
||||
sources: WordSearchSources.empty(),
|
||||
);
|
||||
|
||||
wordSearchResult.inferMatchSpans('仮');
|
||||
|
||||
expect(wordSearchResult.matchSpans, [
|
||||
WordSearchMatchSpan(
|
||||
spanType: WordSearchMatchSpanType.kanji,
|
||||
start: 0,
|
||||
end: 1,
|
||||
index: 0,
|
||||
),
|
||||
]);
|
||||
});
|
||||
|
||||
test('Infer match in middle of word', () {
|
||||
final wordSearchResult = WordSearchResult(
|
||||
entryId: 0,
|
||||
score: 0,
|
||||
isCommon: false,
|
||||
jlptLevel: JlptLevel.none,
|
||||
kanjiInfo: {},
|
||||
readingInfo: {},
|
||||
japanese: [WordSearchRuby(base: 'ありがとう')],
|
||||
senses: [],
|
||||
sources: WordSearchSources.empty(),
|
||||
);
|
||||
|
||||
wordSearchResult.inferMatchSpans('りがと');
|
||||
|
||||
expect(wordSearchResult.matchSpans, [
|
||||
WordSearchMatchSpan(
|
||||
spanType: WordSearchMatchSpanType.kanji,
|
||||
start: 1,
|
||||
end: 4,
|
||||
index: 0,
|
||||
),
|
||||
]);
|
||||
});
|
||||
|
||||
test('Infer match in furigana', () {
|
||||
final wordSearchResult = WordSearchResult(
|
||||
entryId: 0,
|
||||
score: 0,
|
||||
isCommon: false,
|
||||
jlptLevel: JlptLevel.none,
|
||||
kanjiInfo: {},
|
||||
readingInfo: {},
|
||||
japanese: [WordSearchRuby(base: '仮名', furigana: 'かな')],
|
||||
senses: [],
|
||||
sources: WordSearchSources.empty(),
|
||||
);
|
||||
|
||||
wordSearchResult.inferMatchSpans('かな');
|
||||
|
||||
expect(wordSearchResult.matchSpans, [
|
||||
WordSearchMatchSpan(
|
||||
spanType: WordSearchMatchSpanType.kana,
|
||||
start: 0,
|
||||
end: 2,
|
||||
index: 0,
|
||||
),
|
||||
]);
|
||||
});
|
||||
|
||||
test('Infer match in sense', () {
|
||||
final wordSearchResult = WordSearchResult(
|
||||
entryId: 0,
|
||||
score: 0,
|
||||
isCommon: false,
|
||||
jlptLevel: JlptLevel.none,
|
||||
kanjiInfo: {},
|
||||
readingInfo: {},
|
||||
japanese: [WordSearchRuby(base: '仮名')],
|
||||
senses: [
|
||||
WordSearchSense(
|
||||
antonyms: [],
|
||||
dialects: [],
|
||||
englishDefinitions: ['kana'],
|
||||
fields: [],
|
||||
info: [],
|
||||
languageSource: [],
|
||||
misc: [],
|
||||
partsOfSpeech: [],
|
||||
restrictedToKanji: [],
|
||||
restrictedToReading: [],
|
||||
seeAlso: [],
|
||||
),
|
||||
],
|
||||
sources: WordSearchSources.empty(),
|
||||
);
|
||||
|
||||
wordSearchResult.inferMatchSpans('kana');
|
||||
|
||||
expect(wordSearchResult.matchSpans, [
|
||||
WordSearchMatchSpan(
|
||||
spanType: WordSearchMatchSpanType.sense,
|
||||
start: 0,
|
||||
end: 4,
|
||||
index: 0,
|
||||
),
|
||||
]);
|
||||
});
|
||||
|
||||
test('Infer multiple matches', () {
|
||||
final wordSearchResult = WordSearchResult(
|
||||
entryId: 0,
|
||||
score: 0,
|
||||
isCommon: false,
|
||||
jlptLevel: JlptLevel.none,
|
||||
kanjiInfo: {},
|
||||
readingInfo: {},
|
||||
japanese: [WordSearchRuby(base: '仮名', furigana: 'かな')],
|
||||
senses: [
|
||||
WordSearchSense(
|
||||
antonyms: [],
|
||||
dialects: [],
|
||||
englishDefinitions: ['kana', 'the kana'],
|
||||
fields: [],
|
||||
info: [],
|
||||
languageSource: [],
|
||||
misc: [],
|
||||
partsOfSpeech: [],
|
||||
restrictedToKanji: [],
|
||||
restrictedToReading: [],
|
||||
seeAlso: [],
|
||||
),
|
||||
],
|
||||
sources: WordSearchSources.empty(),
|
||||
);
|
||||
|
||||
wordSearchResult.inferMatchSpans('kana');
|
||||
|
||||
expect(wordSearchResult.matchSpans, [
|
||||
WordSearchMatchSpan(
|
||||
spanType: WordSearchMatchSpanType.sense,
|
||||
start: 0,
|
||||
end: 4,
|
||||
index: 0,
|
||||
),
|
||||
WordSearchMatchSpan(
|
||||
spanType: WordSearchMatchSpanType.sense,
|
||||
start: 4,
|
||||
end: 8,
|
||||
index: 0,
|
||||
subIndex: 1,
|
||||
),
|
||||
]);
|
||||
});
|
||||
|
||||
test('Infer match with no matches', () {
|
||||
final wordSearchResult = WordSearchResult(
|
||||
entryId: 0,
|
||||
score: 0,
|
||||
isCommon: false,
|
||||
jlptLevel: JlptLevel.none,
|
||||
kanjiInfo: {},
|
||||
readingInfo: {},
|
||||
japanese: [WordSearchRuby(base: '仮名', furigana: 'かな')],
|
||||
senses: [
|
||||
WordSearchSense(
|
||||
antonyms: [],
|
||||
dialects: [],
|
||||
englishDefinitions: ['kana'],
|
||||
fields: [],
|
||||
info: [],
|
||||
languageSource: [],
|
||||
misc: [],
|
||||
partsOfSpeech: [],
|
||||
restrictedToKanji: [],
|
||||
restrictedToReading: [],
|
||||
seeAlso: [],
|
||||
),
|
||||
],
|
||||
sources: WordSearchSources.empty(),
|
||||
);
|
||||
|
||||
wordSearchResult.inferMatchSpans('xyz');
|
||||
|
||||
expect(wordSearchResult.matchSpans, isEmpty);
|
||||
});
|
||||
|
||||
test('Infer multiple matches of same substring', () {
|
||||
final wordSearchResult = WordSearchResult(
|
||||
entryId: 0,
|
||||
score: 0,
|
||||
isCommon: false,
|
||||
jlptLevel: JlptLevel.none,
|
||||
kanjiInfo: {},
|
||||
readingInfo: {},
|
||||
japanese: [WordSearchRuby(base: 'ああ')],
|
||||
senses: [],
|
||||
sources: WordSearchSources.empty(),
|
||||
);
|
||||
|
||||
wordSearchResult.inferMatchSpans('あ');
|
||||
|
||||
expect(wordSearchResult.matchSpans, [
|
||||
WordSearchMatchSpan(
|
||||
spanType: WordSearchMatchSpanType.kanji,
|
||||
start: 0,
|
||||
end: 1,
|
||||
index: 0,
|
||||
),
|
||||
WordSearchMatchSpan(
|
||||
spanType: WordSearchMatchSpanType.kanji,
|
||||
start: 1,
|
||||
end: 2,
|
||||
index: 0,
|
||||
),
|
||||
]);
|
||||
});
|
||||
}
|
||||
@@ -3,7 +3,7 @@ import 'dart:io';
|
||||
import 'package:jadb/_data_ingestion/open_local_db.dart';
|
||||
import 'package:sqflite_common/sqlite_api.dart';
|
||||
|
||||
Future<Database> setup_database_connection() async {
|
||||
Future<Database> setupDatabaseConnection() async {
|
||||
final libSqlitePath = Platform.environment['LIBSQLITE_PATH'];
|
||||
final jadbPath = Platform.environment['JADB_PATH'];
|
||||
|
||||
|
||||
@@ -5,43 +5,43 @@ import 'setup_database_connection.dart';
|
||||
|
||||
void main() {
|
||||
test('Search a word - english - auto', () async {
|
||||
final connection = await setup_database_connection();
|
||||
final connection = await setupDatabaseConnection();
|
||||
final result = await connection.jadbSearchWord('kana');
|
||||
expect(result, isNotNull);
|
||||
});
|
||||
|
||||
test('Get word search count - english - auto', () async {
|
||||
final connection = await setup_database_connection();
|
||||
final connection = await setupDatabaseConnection();
|
||||
final result = await connection.jadbSearchWordCount('kana');
|
||||
expect(result, isNotNull);
|
||||
});
|
||||
|
||||
test('Search a word - japanese kana - auto', () async {
|
||||
final connection = await setup_database_connection();
|
||||
final connection = await setupDatabaseConnection();
|
||||
final result = await connection.jadbSearchWord('かな');
|
||||
expect(result, isNotNull);
|
||||
});
|
||||
|
||||
test('Get word search count - japanese kana - auto', () async {
|
||||
final connection = await setup_database_connection();
|
||||
final connection = await setupDatabaseConnection();
|
||||
final result = await connection.jadbSearchWordCount('かな');
|
||||
expect(result, isNotNull);
|
||||
});
|
||||
|
||||
test('Search a word - japanese kanji - auto', () async {
|
||||
final connection = await setup_database_connection();
|
||||
final connection = await setupDatabaseConnection();
|
||||
final result = await connection.jadbSearchWord('仮名');
|
||||
expect(result, isNotNull);
|
||||
});
|
||||
|
||||
test('Get word search count - japanese kanji - auto', () async {
|
||||
final connection = await setup_database_connection();
|
||||
final connection = await setupDatabaseConnection();
|
||||
final result = await connection.jadbSearchWordCount('仮名');
|
||||
expect(result, isNotNull);
|
||||
});
|
||||
|
||||
test('Get a word by id', () async {
|
||||
final connection = await setup_database_connection();
|
||||
final connection = await setupDatabaseConnection();
|
||||
final result = await connection.jadbGetWordById(1577090);
|
||||
expect(result, isNotNull);
|
||||
});
|
||||
@@ -49,7 +49,7 @@ void main() {
|
||||
test(
|
||||
'Serialize all words',
|
||||
() async {
|
||||
final connection = await setup_database_connection();
|
||||
final connection = await setupDatabaseConnection();
|
||||
|
||||
// Test serializing all words
|
||||
for (final letter in 'aiueoksthnmyrw'.split('')) {
|
||||
|
||||
51
test/util/lemmatizer/lemmatizer_test.dart
Normal file
51
test/util/lemmatizer/lemmatizer_test.dart
Normal file
@@ -0,0 +1,51 @@
|
||||
import 'package:jadb/util/lemmatizer/lemmatizer.dart';
|
||||
import 'package:jadb/util/lemmatizer/rules/godan_verbs.dart';
|
||||
import 'package:jadb/util/lemmatizer/rules/ichidan_verbs.dart';
|
||||
import 'package:test/test.dart';
|
||||
|
||||
const List<String> ichidanVerbs = [
|
||||
'食べる',
|
||||
'食べた',
|
||||
'食べさせられた',
|
||||
'食べたい',
|
||||
'食べたくない',
|
||||
'食べたくなかった',
|
||||
];
|
||||
const List<String> godanVerbs = [
|
||||
'泳ぐ',
|
||||
'泳いだ',
|
||||
'泳げる',
|
||||
// '泳げれた',
|
||||
];
|
||||
|
||||
bool findRuleRecursively(Lemmatized result, LemmatizationRule expectedRule) {
|
||||
if (result.rule == expectedRule) {
|
||||
return true;
|
||||
}
|
||||
|
||||
for (final c in result.children) {
|
||||
if (findRuleRecursively(c, expectedRule)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void main() {
|
||||
group('Lemmatize Ichidan Verbs', () {
|
||||
for (final v in ichidanVerbs) {
|
||||
test('Lemmatize Ichidan Verb $v', () {
|
||||
expect(findRuleRecursively(lemmatize(v), ichidanVerbBase), true);
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
group('Lemmatize Godan Verbs', () {
|
||||
for (final v in godanVerbs) {
|
||||
test('Lemmatize Godan Verb $v', () {
|
||||
expect(findRuleRecursively(lemmatize(v), godanVerbBase), true);
|
||||
});
|
||||
}
|
||||
});
|
||||
}
|
||||
14
test/util/lemmatizer/rules/godan_verbs_test.dart
Normal file
14
test/util/lemmatizer/rules/godan_verbs_test.dart
Normal file
@@ -0,0 +1,14 @@
|
||||
import 'package:jadb/util/lemmatizer/rules/godan_verbs.dart';
|
||||
import 'package:test/test.dart';
|
||||
|
||||
void main() {
|
||||
test('Test Godan Verb Base Rule', () {
|
||||
expect(godanVerbBase.matches('泳ぐ'), true);
|
||||
expect(godanVerbBase.apply('泳ぐ'), ['泳ぐ']);
|
||||
});
|
||||
|
||||
test('Test Godan Verb Negative Rule', () {
|
||||
expect(godanVerbNegative.matches('泳がない'), true);
|
||||
expect(godanVerbNegative.apply('泳がない'), ['泳ぐ']);
|
||||
});
|
||||
}
|
||||
15
test/util/lemmatizer/rules/i_adjectives_test.dart
Normal file
15
test/util/lemmatizer/rules/i_adjectives_test.dart
Normal file
@@ -0,0 +1,15 @@
|
||||
import 'package:jadb/util/lemmatizer/rules/i_adjectives.dart';
|
||||
import 'package:test/test.dart';
|
||||
|
||||
void main() {
|
||||
test('Test i-adjective Base Rule', () {
|
||||
expect(iAdjectiveBase.matches('怪しい'), true);
|
||||
expect(iAdjectiveBase.apply('怪しい'), ['怪しい']);
|
||||
});
|
||||
|
||||
|
||||
test('Test i-adjective Negative Rule', () {
|
||||
expect(iAdjectiveNegative.matches('怪しくない'), true);
|
||||
expect(iAdjectiveNegative.apply('怪しくない'), ['怪しい']);
|
||||
});
|
||||
}
|
||||
14
test/util/lemmatizer/rules/ichidan_verbs_test.dart
Normal file
14
test/util/lemmatizer/rules/ichidan_verbs_test.dart
Normal file
@@ -0,0 +1,14 @@
|
||||
import 'package:jadb/util/lemmatizer/rules/ichidan_verbs.dart';
|
||||
import 'package:test/test.dart';
|
||||
|
||||
void main() {
|
||||
test('Test Ichidan Verb Base Rule', () {
|
||||
expect(ichidanVerbBase.matches('食べる'), true);
|
||||
expect(ichidanVerbBase.apply('食べる'), ['食べる']);
|
||||
});
|
||||
|
||||
test('Test Ichidan Verb Negative Rule', () {
|
||||
expect(ichidanVerbNegative.matches('食べない'), true);
|
||||
expect(ichidanVerbNegative.apply('食べない'), ['食べる']);
|
||||
});
|
||||
}
|
||||
15
test/util/lemmatizer/rules_test.dart
Normal file
15
test/util/lemmatizer/rules_test.dart
Normal file
@@ -0,0 +1,15 @@
|
||||
import 'package:jadb/util/lemmatizer/lemmatizer.dart';
|
||||
import 'package:jadb/util/lemmatizer/rules.dart';
|
||||
import 'package:test/test.dart';
|
||||
|
||||
void main() {
|
||||
test('Assert lemmatizerRulesByWordClass is correct', () {
|
||||
for (final entry in lemmatizationRulesByWordClass.entries) {
|
||||
final WordClass wordClass = entry.key;
|
||||
final List<LemmatizationRule> rules = entry.value;
|
||||
for (final LemmatizationRule rule in rules) {
|
||||
expect(wordClass, rule.wordClass);
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
@@ -37,6 +37,35 @@ void main() {
|
||||
});
|
||||
});
|
||||
|
||||
group('Romaji -> Hiragana Spans', () {
|
||||
void Function() expectSpans(String input, List<String> expected) => () {
|
||||
final result = transliterateLatinToHiraganaSpan(input);
|
||||
final trans = transliterateLatinToHiragana(input);
|
||||
for (int i = 0; i < result.length; i++) {
|
||||
expect(
|
||||
trans.substring(
|
||||
result[i].$2,
|
||||
i == result.length - 1 ? trans.length : result[i + 1].$2,
|
||||
),
|
||||
expected[i],
|
||||
);
|
||||
}
|
||||
};
|
||||
|
||||
test('Basic test', expectSpans('katamari', ['か', 'た', 'ま', 'り']));
|
||||
test(
|
||||
'Basic test with diacritics',
|
||||
expectSpans('gadamari', ['が', 'だ', 'ま', 'り']),
|
||||
);
|
||||
test('wi and we', expectSpans('wiwe', ['うぃ', 'うぇ']));
|
||||
test('nb = mb', expectSpans('kanpai', ['か', 'ん', 'ぱ', 'い']));
|
||||
test('nb = mb', expectSpans('kampai', ['か', 'ん', 'ぱ', 'い']));
|
||||
test('Double n', expectSpans('konnichiha', ['こ', 'ん', 'に', 'ち', 'は']));
|
||||
|
||||
// TODO: fix the implementation
|
||||
// test('Double consonant', expectSpans('kappa', ['か', 'っぱ']));
|
||||
});
|
||||
|
||||
group('Hiragana -> Romaji', () {
|
||||
test('Basic test', () {
|
||||
final result = transliterateHiraganaToLatin('かたまり');
|
||||
@@ -63,4 +92,31 @@ void main() {
|
||||
expect(result, 'kappa');
|
||||
});
|
||||
});
|
||||
|
||||
group('Hiragana -> Romaji Spans', () {
|
||||
void Function() expectSpans(String input, List<String> expected) => () {
|
||||
final result = transliterateHiraganaToLatinSpan(input);
|
||||
final trans = transliterateHiraganaToLatin(input);
|
||||
for (int i = 0; i < result.length; i++) {
|
||||
expect(
|
||||
trans.substring(
|
||||
result[i].$2,
|
||||
i == result.length - 1 ? trans.length : result[i + 1].$2,
|
||||
),
|
||||
expected[i],
|
||||
);
|
||||
}
|
||||
};
|
||||
|
||||
test('Basic test', expectSpans('かたまり', ['ka', 'ta', 'ma', 'ri']));
|
||||
test(
|
||||
'Basic test with diacritics',
|
||||
expectSpans('がだまり', ['ga', 'da', 'ma', 'ri']),
|
||||
);
|
||||
test('wi and we', expectSpans('うぃうぇ', ['whi', 'whe']));
|
||||
test('Double n', expectSpans('こんにちは', ['ko', 'n', 'ni', 'chi', 'ha']));
|
||||
|
||||
// TODO: fix the implementation
|
||||
// test('Double consonant', expectSpans('かっぱ', ['ka', 'ppa']));
|
||||
});
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user