Compare commits
27 Commits
match-span
...
word-regro
| Author | SHA1 | Date | |
|---|---|---|---|
|
df8b204d3c
|
|||
|
c70838d1bf
|
|||
|
0f7854a4fc
|
|||
|
a86f857553
|
|||
|
d14e3909d4
|
|||
|
bb44bf786a
|
|||
|
ad3343a01e
|
|||
|
16d72e94ba
|
|||
|
b070a1fd31
|
|||
|
dcf5c8ebe7
|
|||
|
1f8bc8bac5
|
|||
|
ab28b5788b
|
|||
|
dd7b2917dc
|
|||
|
74798c77b5
|
|||
|
63a4caa626
|
|||
|
374be5ca6b
|
|||
|
4a6fd41f31
|
|||
|
c06fff9e5a
|
|||
|
1d9928ade1
|
|||
|
1a3b04be00
|
|||
|
c0c6f97a01
|
|||
|
a954188d5d
|
|||
|
5b86d6eb67
|
|||
|
72f31e974b
|
|||
|
e824dc0a22
|
|||
|
f5bca61839
|
|||
|
056aaaa0ce
|
74
.gitea/workflows/build-and-test.yml
Normal file
74
.gitea/workflows/build-and-test.yml
Normal file
@@ -0,0 +1,74 @@
|
|||||||
|
name: "Build and test"
|
||||||
|
on:
|
||||||
|
workflow_dispatch:
|
||||||
|
pull_request:
|
||||||
|
push:
|
||||||
|
jobs:
|
||||||
|
build:
|
||||||
|
runs-on: debian-latest
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v6
|
||||||
|
|
||||||
|
- name: Install sudo
|
||||||
|
run: apt-get update && apt-get -y install sudo
|
||||||
|
|
||||||
|
- name: Install nix
|
||||||
|
uses: https://github.com/cachix/install-nix-action@v31
|
||||||
|
with:
|
||||||
|
extra_nix_config: |
|
||||||
|
experimental-features = nix-command flakes
|
||||||
|
show-trace = true
|
||||||
|
max-jobs = auto
|
||||||
|
trusted-users = root
|
||||||
|
experimental-features = nix-command flakes
|
||||||
|
build-users-group =
|
||||||
|
|
||||||
|
- name: Update database inputs
|
||||||
|
run: |
|
||||||
|
nix flake update jmdict-src
|
||||||
|
nix flake update jmdict-with-examples-src
|
||||||
|
nix flake update radkfile-src
|
||||||
|
nix flake update kanjidic2-src
|
||||||
|
|
||||||
|
- name: Build database
|
||||||
|
run: nix build .#database -L
|
||||||
|
|
||||||
|
- name: Upload database as artifact
|
||||||
|
uses: actions/upload-artifact@v3
|
||||||
|
with:
|
||||||
|
name: jadb-${{ gitea.sha }}.zip
|
||||||
|
path: result/jadb.sqlite
|
||||||
|
if-no-files-found: error
|
||||||
|
retention-days: 15
|
||||||
|
# Already compressed
|
||||||
|
compression: 0
|
||||||
|
|
||||||
|
- name: Print database statistics
|
||||||
|
run: nix develop .# --command sqlite3_analyzer result/jadb.sqlite
|
||||||
|
|
||||||
|
# TODO: Defer failure of tests until after the coverage report is generated and uploaded.
|
||||||
|
- name: Run tests
|
||||||
|
run: nix develop .# --command dart run test --concurrency=1 --coverage-path=coverage/lcov.info
|
||||||
|
|
||||||
|
- name: Generate coverage report
|
||||||
|
run: |
|
||||||
|
GENHTML_ARGS=(
|
||||||
|
--current-date="$(date)"
|
||||||
|
--dark-mode
|
||||||
|
--output-directory coverage/report
|
||||||
|
)
|
||||||
|
|
||||||
|
nix develop .# --command genhtml "${GENHTML_ARGS[@]}" coverage/lcov.info
|
||||||
|
|
||||||
|
- name: Upload coverage report
|
||||||
|
uses: https://git.pvv.ntnu.no/Projects/rsync-action@v2
|
||||||
|
with:
|
||||||
|
source: ./coverage
|
||||||
|
target: jadb/${{ gitea.ref_name }}/
|
||||||
|
username: oysteikt
|
||||||
|
ssh-key: ${{ secrets.OYSTEIKT_GITEA_WEBDOCS_SSH_KEY }}
|
||||||
|
host: microbel.pvv.ntnu.no
|
||||||
|
known-hosts: "microbel.pvv.ntnu.no ecdsa-sha2-nistp256 AAAAE2VjZHNhLXNoYTItbmlzdHAyNTYAAAAIbmlzdHAyNTYAAABBBEq0yasKP0mH6PI6ypmuzPzMnbHELo9k+YB5yW534aKudKZS65YsHJKQ9vapOtmegrn5MQbCCgrshf+/XwZcjbM="
|
||||||
|
|
||||||
|
- name: Run benchmarks
|
||||||
|
run: nix develop .# --command dart run benchmark_harness:bench --flavor jit
|
||||||
@@ -1,38 +0,0 @@
|
|||||||
name: "Build database"
|
|
||||||
on:
|
|
||||||
pull_request:
|
|
||||||
push:
|
|
||||||
jobs:
|
|
||||||
evals:
|
|
||||||
runs-on: debian-latest
|
|
||||||
steps:
|
|
||||||
- uses: actions/checkout@v6
|
|
||||||
|
|
||||||
- name: Install sudo
|
|
||||||
run: apt-get update && apt-get -y install sudo
|
|
||||||
|
|
||||||
- name: Install nix
|
|
||||||
uses: https://github.com/cachix/install-nix-action@v31
|
|
||||||
|
|
||||||
- name: Configure nix
|
|
||||||
run: echo -e "show-trace = true\nmax-jobs = auto\ntrusted-users = root\nexperimental-features = nix-command flakes\nbuild-users-group =" > /etc/nix/nix.conf
|
|
||||||
|
|
||||||
- name: Update database inputs
|
|
||||||
run: |
|
|
||||||
nix flake update jmdict-src
|
|
||||||
nix flake update jmdict-with-examples-src
|
|
||||||
nix flake update radkfile-src
|
|
||||||
nix flake update kanjidic2-src
|
|
||||||
|
|
||||||
- name: Build database
|
|
||||||
run: nix build .#database -L
|
|
||||||
|
|
||||||
- name: Upload database as artifact
|
|
||||||
uses: actions/upload-artifact@v3
|
|
||||||
with:
|
|
||||||
name: jadb-${{ gitea.sha }}.zip
|
|
||||||
path: result/jadb.sqlite
|
|
||||||
if-no-files-found: error
|
|
||||||
retention-days: 15
|
|
||||||
# Already compressed
|
|
||||||
compression: 0
|
|
||||||
@@ -1,31 +0,0 @@
|
|||||||
name: "Run tests"
|
|
||||||
on:
|
|
||||||
pull_request:
|
|
||||||
push:
|
|
||||||
jobs:
|
|
||||||
evals:
|
|
||||||
runs-on: debian-latest
|
|
||||||
steps:
|
|
||||||
- uses: actions/checkout@v6
|
|
||||||
|
|
||||||
- name: Install sudo
|
|
||||||
run: apt-get update && apt-get -y install sudo
|
|
||||||
|
|
||||||
- name: Install nix
|
|
||||||
uses: https://github.com/cachix/install-nix-action@v31
|
|
||||||
|
|
||||||
- name: Configure nix
|
|
||||||
run: echo -e "show-trace = true\nmax-jobs = auto\ntrusted-users = root\nexperimental-features = nix-command flakes\nbuild-users-group =" > /etc/nix/nix.conf
|
|
||||||
|
|
||||||
- name: Update database inputs
|
|
||||||
run: |
|
|
||||||
nix flake update jmdict-src
|
|
||||||
nix flake update jmdict-with-examples-src
|
|
||||||
nix flake update radkfile-src
|
|
||||||
nix flake update kanjidic2-src
|
|
||||||
|
|
||||||
- name: Build database
|
|
||||||
run: nix build .#database -L
|
|
||||||
|
|
||||||
- name: Run tests
|
|
||||||
run: nix develop .# --command dart test
|
|
||||||
1
.gitignore
vendored
1
.gitignore
vendored
@@ -8,6 +8,7 @@
|
|||||||
# Conventional directory for build output.
|
# Conventional directory for build output.
|
||||||
/doc/
|
/doc/
|
||||||
/build/
|
/build/
|
||||||
|
/coverage/
|
||||||
main.db
|
main.db
|
||||||
|
|
||||||
# Nix
|
# Nix
|
||||||
|
|||||||
29
README.md
29
README.md
@@ -1,7 +1,9 @@
|
|||||||
# jadb
|
|
||||||
|
|
||||||
[](https://builtwithnix.org)
|
[](https://builtwithnix.org)
|
||||||
|
|
||||||
|
[Latest coverage report](https://www.pvv.ntnu.no/~oysteikt/gitea/jadb/main/coverage/report/)
|
||||||
|
|
||||||
|
# jadb
|
||||||
|
|
||||||
An SQLite database containing open source japanese dictionary data combined from several sources
|
An SQLite database containing open source japanese dictionary data combined from several sources
|
||||||
|
|
||||||
Note that while the license for the code is MIT, the data has various licenses.
|
Note that while the license for the code is MIT, the data has various licenses.
|
||||||
@@ -16,3 +18,26 @@ Note that while the license for the code is MIT, the data has various licenses.
|
|||||||
| **Tanos JLPT levels:** | https://www.tanos.co.uk/jlpt/ |
|
| **Tanos JLPT levels:** | https://www.tanos.co.uk/jlpt/ |
|
||||||
| **Kangxi Radicals:** | https://ctext.org/kangxi-zidian |
|
| **Kangxi Radicals:** | https://ctext.org/kangxi-zidian |
|
||||||
|
|
||||||
|
## Implementation details
|
||||||
|
|
||||||
|
### Word search
|
||||||
|
|
||||||
|
The word search procedure is currently split into 3 parts:
|
||||||
|
|
||||||
|
1. **Entry ID query**:
|
||||||
|
|
||||||
|
Use a complex query with various scoring factors to try to get list of
|
||||||
|
database ids pointing at dictionary entries, sorted by how likely we think this
|
||||||
|
word is the word that the caller is looking for. The output here is a `List<int>`
|
||||||
|
|
||||||
|
2. **Data Query**:
|
||||||
|
|
||||||
|
Takes the entry id list from the last search, and performs all queries needed to retrieve
|
||||||
|
all the dictionary data for those IDs. The result is a struct with a bunch of flattened lists
|
||||||
|
with data for all the dictionary entries. These lists are sorted by the order that the ids
|
||||||
|
were provided.
|
||||||
|
|
||||||
|
3. **Regrouping**:
|
||||||
|
|
||||||
|
Takes the flattened data, and regroups the items into structs with a more "hierarchical" structure.
|
||||||
|
All data tagged with the same ID will end up in the same struct. Returns a list of these structs.
|
||||||
|
|||||||
5
benchmark/benchmark.dart
Normal file
5
benchmark/benchmark.dart
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
import './search/word_search.dart';
|
||||||
|
|
||||||
|
Future<void> main() async {
|
||||||
|
await WordSearchBenchmark.main();
|
||||||
|
}
|
||||||
45
benchmark/search/word_search.dart
Normal file
45
benchmark/search/word_search.dart
Normal file
@@ -0,0 +1,45 @@
|
|||||||
|
import 'package:benchmark_harness/benchmark_harness.dart';
|
||||||
|
import 'package:jadb/search.dart';
|
||||||
|
import 'package:sqflite_common/sqlite_api.dart';
|
||||||
|
|
||||||
|
import '../../test/search/setup_database_connection.dart';
|
||||||
|
|
||||||
|
class WordSearchBenchmark extends AsyncBenchmarkBase {
|
||||||
|
Database? connection;
|
||||||
|
|
||||||
|
static final List<String> searchTerms = [
|
||||||
|
'kana',
|
||||||
|
'kanji',
|
||||||
|
'kawaii',
|
||||||
|
'sushi',
|
||||||
|
'ramen',
|
||||||
|
];
|
||||||
|
|
||||||
|
WordSearchBenchmark() : super('WordSearchBenchmark');
|
||||||
|
|
||||||
|
static Future<void> main() async {
|
||||||
|
print('Running WordSearchBenchmark...');
|
||||||
|
await WordSearchBenchmark().report();
|
||||||
|
print('Finished WordSearchBenchmark');
|
||||||
|
}
|
||||||
|
|
||||||
|
@override
|
||||||
|
Future<void> setup() async {
|
||||||
|
connection = await setupDatabaseConnection();
|
||||||
|
}
|
||||||
|
|
||||||
|
@override
|
||||||
|
Future<void> run() async {
|
||||||
|
for (final term in searchTerms) {
|
||||||
|
await connection!.jadbSearchWord(term);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@override
|
||||||
|
Future<void> teardown() async {
|
||||||
|
await connection?.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
// @override
|
||||||
|
// Future<void> exercise() => run();
|
||||||
|
}
|
||||||
12
flake.lock
generated
12
flake.lock
generated
@@ -3,7 +3,7 @@
|
|||||||
"jmdict-src": {
|
"jmdict-src": {
|
||||||
"flake": false,
|
"flake": false,
|
||||||
"locked": {
|
"locked": {
|
||||||
"narHash": "sha256-1if5Z1ynrCd05ySrvD6ZA1PfKBayhBFzUOe5vplwYXM=",
|
"narHash": "sha256-lh46uougUzBrRhhwa7cOb32j5Jt9/RjBUhlVjwVzsII=",
|
||||||
"type": "file",
|
"type": "file",
|
||||||
"url": "http://ftp.edrdg.org/pub/Nihongo/JMdict_e.gz"
|
"url": "http://ftp.edrdg.org/pub/Nihongo/JMdict_e.gz"
|
||||||
},
|
},
|
||||||
@@ -15,7 +15,7 @@
|
|||||||
"jmdict-with-examples-src": {
|
"jmdict-with-examples-src": {
|
||||||
"flake": false,
|
"flake": false,
|
||||||
"locked": {
|
"locked": {
|
||||||
"narHash": "sha256-3Eb8iVSZFvuf4yH/53tDdN6Znt+tvvra6kd7GIv4LYE=",
|
"narHash": "sha256-5oS2xDyetbuSM6ax3LUjYA3N60x+D3Hg41HEXGFMqLQ=",
|
||||||
"type": "file",
|
"type": "file",
|
||||||
"url": "http://ftp.edrdg.org/pub/Nihongo/JMdict_e_examp.gz"
|
"url": "http://ftp.edrdg.org/pub/Nihongo/JMdict_e_examp.gz"
|
||||||
},
|
},
|
||||||
@@ -27,7 +27,7 @@
|
|||||||
"kanjidic2-src": {
|
"kanjidic2-src": {
|
||||||
"flake": false,
|
"flake": false,
|
||||||
"locked": {
|
"locked": {
|
||||||
"narHash": "sha256-mg2cP3rX1wm+dTAQCNHthVcKUH5PZRhGbHv1AP2EwJQ=",
|
"narHash": "sha256-orSeQqSxhn9TtX3anYtbiMEm7nFkuomGnIKoVIUR2CM=",
|
||||||
"type": "file",
|
"type": "file",
|
||||||
"url": "https://www.edrdg.org/kanjidic/kanjidic2.xml.gz"
|
"url": "https://www.edrdg.org/kanjidic/kanjidic2.xml.gz"
|
||||||
},
|
},
|
||||||
@@ -38,11 +38,11 @@
|
|||||||
},
|
},
|
||||||
"nixpkgs": {
|
"nixpkgs": {
|
||||||
"locked": {
|
"locked": {
|
||||||
"lastModified": 1771369470,
|
"lastModified": 1771848320,
|
||||||
"narHash": "sha256-0NBlEBKkN3lufyvFegY4TYv5mCNHbi5OmBDrzihbBMQ=",
|
"narHash": "sha256-0MAd+0mun3K/Ns8JATeHT1sX28faLII5hVLq0L3BdZU=",
|
||||||
"owner": "NixOS",
|
"owner": "NixOS",
|
||||||
"repo": "nixpkgs",
|
"repo": "nixpkgs",
|
||||||
"rev": "0182a361324364ae3f436a63005877674cf45efb",
|
"rev": "2fc6539b481e1d2569f25f8799236694180c0993",
|
||||||
"type": "github"
|
"type": "github"
|
||||||
},
|
},
|
||||||
"original": {
|
"original": {
|
||||||
|
|||||||
@@ -80,8 +80,9 @@
|
|||||||
buildInputs = with pkgs; [
|
buildInputs = with pkgs; [
|
||||||
dart
|
dart
|
||||||
gnumake
|
gnumake
|
||||||
sqlite-interactive
|
lcov
|
||||||
sqlite-analyzer
|
sqlite-analyzer
|
||||||
|
sqlite-interactive
|
||||||
sqlite-web
|
sqlite-web
|
||||||
# sqlint
|
# sqlint
|
||||||
sqlfluff
|
sqlfluff
|
||||||
@@ -89,6 +90,7 @@
|
|||||||
env = {
|
env = {
|
||||||
LIBSQLITE_PATH = "${pkgs.sqlite.out}/lib/libsqlite3.so";
|
LIBSQLITE_PATH = "${pkgs.sqlite.out}/lib/libsqlite3.so";
|
||||||
JADB_PATH = "result/jadb.sqlite";
|
JADB_PATH = "result/jadb.sqlite";
|
||||||
|
LD_LIBRARY_PATH = lib.makeLibraryPath [ pkgs.sqlite ];
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -10,14 +10,15 @@ List<int?> getPriorityValues(XmlElement e, String prefix) {
|
|||||||
final txt = pri.innerText;
|
final txt = pri.innerText;
|
||||||
if (txt.startsWith('news')) {
|
if (txt.startsWith('news')) {
|
||||||
news = int.parse(txt.substring(4));
|
news = int.parse(txt.substring(4));
|
||||||
} else if (txt.startsWith('ichi'))
|
} else if (txt.startsWith('ichi')) {
|
||||||
ichi = int.parse(txt.substring(4));
|
ichi = int.parse(txt.substring(4));
|
||||||
else if (txt.startsWith('spec'))
|
} else if (txt.startsWith('spec')) {
|
||||||
spec = int.parse(txt.substring(4));
|
spec = int.parse(txt.substring(4));
|
||||||
else if (txt.startsWith('gai'))
|
} else if (txt.startsWith('gai')) {
|
||||||
gai = int.parse(txt.substring(3));
|
gai = int.parse(txt.substring(3));
|
||||||
else if (txt.startsWith('nf'))
|
} else if (txt.startsWith('nf')) {
|
||||||
nf = int.parse(txt.substring(2));
|
nf = int.parse(txt.substring(2));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return [news, ichi, spec, gai, nf];
|
return [news, ichi, spec, gai, nf];
|
||||||
}
|
}
|
||||||
@@ -79,16 +80,16 @@ List<Entry> parseJMDictData(XmlElement root) {
|
|||||||
final List<ReadingElement> readingEls = [];
|
final List<ReadingElement> readingEls = [];
|
||||||
final List<Sense> senses = [];
|
final List<Sense> senses = [];
|
||||||
|
|
||||||
for (final (kanjiNum, k_ele) in entry.findElements('k_ele').indexed) {
|
for (final (kanjiNum, kEle) in entry.findElements('k_ele').indexed) {
|
||||||
final kePri = getPriorityValues(k_ele, 'ke');
|
final kePri = getPriorityValues(kEle, 'ke');
|
||||||
kanjiEls.add(
|
kanjiEls.add(
|
||||||
KanjiElement(
|
KanjiElement(
|
||||||
orderNum: kanjiNum + 1,
|
orderNum: kanjiNum + 1,
|
||||||
info: k_ele
|
info: kEle
|
||||||
.findElements('ke_inf')
|
.findElements('ke_inf')
|
||||||
.map((e) => e.innerText.substring(1, e.innerText.length - 1))
|
.map((e) => e.innerText.substring(1, e.innerText.length - 1))
|
||||||
.toList(),
|
.toList(),
|
||||||
reading: k_ele.findElements('keb').first.innerText,
|
reading: kEle.findElements('keb').first.innerText,
|
||||||
news: kePri[0],
|
news: kePri[0],
|
||||||
ichi: kePri[1],
|
ichi: kePri[1],
|
||||||
spec: kePri[2],
|
spec: kePri[2],
|
||||||
@@ -98,24 +99,24 @@ List<Entry> parseJMDictData(XmlElement root) {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (final (orderNum, r_ele) in entry.findElements('r_ele').indexed) {
|
for (final (orderNum, rEle) in entry.findElements('r_ele').indexed) {
|
||||||
final rePri = getPriorityValues(r_ele, 're');
|
final rePri = getPriorityValues(rEle, 're');
|
||||||
final readingDoesNotMatchKanji = r_ele
|
final readingDoesNotMatchKanji = rEle
|
||||||
.findElements('re_nokanji')
|
.findElements('re_nokanji')
|
||||||
.isNotEmpty;
|
.isNotEmpty;
|
||||||
readingEls.add(
|
readingEls.add(
|
||||||
ReadingElement(
|
ReadingElement(
|
||||||
orderNum: orderNum + 1,
|
orderNum: orderNum + 1,
|
||||||
readingDoesNotMatchKanji: readingDoesNotMatchKanji,
|
readingDoesNotMatchKanji: readingDoesNotMatchKanji,
|
||||||
info: r_ele
|
info: rEle
|
||||||
.findElements('re_inf')
|
.findElements('re_inf')
|
||||||
.map((e) => e.innerText.substring(1, e.innerText.length - 1))
|
.map((e) => e.innerText.substring(1, e.innerText.length - 1))
|
||||||
.toList(),
|
.toList(),
|
||||||
restrictions: r_ele
|
restrictions: rEle
|
||||||
.findElements('re_restr')
|
.findElements('re_restr')
|
||||||
.map((e) => e.innerText)
|
.map((e) => e.innerText)
|
||||||
.toList(),
|
.toList(),
|
||||||
reading: r_ele.findElements('reb').first.innerText,
|
reading: rEle.findElements('reb').first.innerText,
|
||||||
news: rePri[0],
|
news: rePri[0],
|
||||||
ichi: rePri[1],
|
ichi: rePri[1],
|
||||||
spec: rePri[2],
|
spec: rePri[2],
|
||||||
|
|||||||
@@ -19,19 +19,18 @@ Future<Database> openLocalDb({
|
|||||||
throw Exception('JADB_PATH does not exist: $jadbPath');
|
throw Exception('JADB_PATH does not exist: $jadbPath');
|
||||||
}
|
}
|
||||||
|
|
||||||
final db =
|
final db = await createDatabaseFactoryFfi().openDatabase(
|
||||||
await createDatabaseFactoryFfi().openDatabase(
|
jadbPath,
|
||||||
jadbPath,
|
options: OpenDatabaseOptions(
|
||||||
options: OpenDatabaseOptions(
|
onConfigure: (db) async {
|
||||||
onConfigure: (db) async {
|
if (walMode) {
|
||||||
if (walMode) {
|
await db.execute('PRAGMA journal_mode = WAL');
|
||||||
await db.execute('PRAGMA journal_mode = WAL');
|
}
|
||||||
}
|
await db.execute('PRAGMA foreign_keys = ON');
|
||||||
await db.execute('PRAGMA foreign_keys = ON');
|
},
|
||||||
},
|
readOnly: !readWrite,
|
||||||
readOnly: !readWrite,
|
),
|
||||||
),
|
);
|
||||||
);
|
|
||||||
|
|
||||||
if (verifyTablesExist) {
|
if (verifyTablesExist) {
|
||||||
await db.jadbVerifyTables();
|
await db.jadbVerifyTables();
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
const Map<(String?, String), int?> TANOS_JLPT_OVERRIDES = {
|
const Map<(String?, String), int?> tanosJLPTOverrides = {
|
||||||
// N5:
|
// N5:
|
||||||
(null, 'あなた'): 1223615,
|
(null, 'あなた'): 1223615,
|
||||||
(null, 'あの'): 1000430,
|
(null, 'あの'): 1000430,
|
||||||
|
|||||||
@@ -76,7 +76,7 @@ Future<int?> findEntry(
|
|||||||
if ((entryIds.isEmpty || entryIds.length > 1) && useOverrides) {
|
if ((entryIds.isEmpty || entryIds.length > 1) && useOverrides) {
|
||||||
print('No entry found, trying to fetch from overrides');
|
print('No entry found, trying to fetch from overrides');
|
||||||
final overrideEntries = word.readings
|
final overrideEntries = word.readings
|
||||||
.map((reading) => TANOS_JLPT_OVERRIDES[(word.kanji, reading)])
|
.map((reading) => tanosJLPTOverrides[(word.kanji, reading)])
|
||||||
.whereType<int>()
|
.whereType<int>()
|
||||||
.toSet();
|
.toSet();
|
||||||
|
|
||||||
@@ -86,7 +86,7 @@ Future<int?> findEntry(
|
|||||||
);
|
);
|
||||||
} else if (overrideEntries.isEmpty &&
|
} else if (overrideEntries.isEmpty &&
|
||||||
!word.readings.any(
|
!word.readings.any(
|
||||||
(reading) => TANOS_JLPT_OVERRIDES.containsKey((word.kanji, reading)),
|
(reading) => tanosJLPTOverrides.containsKey((word.kanji, reading)),
|
||||||
)) {
|
)) {
|
||||||
throw Exception(
|
throw Exception(
|
||||||
'No override entry found for ${word.toString()}: $entryIds',
|
'No override entry found for ${word.toString()}: $entryIds',
|
||||||
|
|||||||
@@ -1,6 +1,5 @@
|
|||||||
/// Jouyou kanji sorted primarily by grades and secondarily by strokes.
|
/// Jouyou kanji sorted primarily by grades and secondarily by strokes.
|
||||||
const Map<int, Map<int, List<String>>>
|
const Map<int, Map<int, List<String>>> jouyouKanjiByGradeAndStrokeCount = {
|
||||||
JOUYOU_KANJI_BY_GRADE_AND_STROKE_COUNT = {
|
|
||||||
1: {
|
1: {
|
||||||
1: ['一'],
|
1: ['一'],
|
||||||
2: ['力', '八', '入', '二', '人', '十', '七', '九'],
|
2: ['力', '八', '入', '二', '人', '十', '七', '九'],
|
||||||
@@ -1861,8 +1860,8 @@ JOUYOU_KANJI_BY_GRADE_AND_STROKE_COUNT = {
|
|||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
final Map<int, List<String>> JOUYOU_KANJI_BY_GRADES =
|
final Map<int, List<String>> jouyouKanjiByGrades =
|
||||||
JOUYOU_KANJI_BY_GRADE_AND_STROKE_COUNT.entries
|
jouyouKanjiByGradeAndStrokeCount.entries
|
||||||
.expand((entry) => entry.value.entries)
|
.expand((entry) => entry.value.entries)
|
||||||
.map((entry) => MapEntry(entry.key, entry.value))
|
.map((entry) => MapEntry(entry.key, entry.value))
|
||||||
.fold<Map<int, List<String>>>(
|
.fold<Map<int, List<String>>>(
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
const Map<int, List<String>> RADICALS = {
|
const Map<int, List<String>> radicals = {
|
||||||
1: ['一', '|', '丶', 'ノ', '乙', '亅'],
|
1: ['一', '|', '丶', 'ノ', '乙', '亅'],
|
||||||
2: [
|
2: [
|
||||||
'二',
|
'二',
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ import 'package:jadb/models/word_search/word_search_ruby.dart';
|
|||||||
import 'package:jadb/models/word_search/word_search_sense.dart';
|
import 'package:jadb/models/word_search/word_search_sense.dart';
|
||||||
import 'package:jadb/models/word_search/word_search_sources.dart';
|
import 'package:jadb/models/word_search/word_search_sources.dart';
|
||||||
import 'package:jadb/search/word_search/word_search.dart';
|
import 'package:jadb/search/word_search/word_search.dart';
|
||||||
|
import 'package:jadb/util/romaji_transliteration.dart';
|
||||||
|
|
||||||
/// A class representing a single dictionary entry from a word search.
|
/// A class representing a single dictionary entry from a word search.
|
||||||
class WordSearchResult {
|
class WordSearchResult {
|
||||||
@@ -44,6 +45,35 @@ class WordSearchResult {
|
|||||||
/// the original searchword.
|
/// the original searchword.
|
||||||
List<WordSearchMatchSpan>? matchSpans;
|
List<WordSearchMatchSpan>? matchSpans;
|
||||||
|
|
||||||
|
/// All contents of [japanese], transliterated to romaji
|
||||||
|
List<String> get romaji => japanese
|
||||||
|
.map((word) => transliterateKanaToLatin(word.furigana ?? word.base))
|
||||||
|
.toList();
|
||||||
|
|
||||||
|
/// All contents of [japanase], where the furigana has either been transliterated to romaji, or
|
||||||
|
/// contains the furigana transliteration of [WordSearchRuby.base].
|
||||||
|
List<WordSearchRuby> get romajiRubys => japanese
|
||||||
|
.map(
|
||||||
|
(word) => WordSearchRuby(
|
||||||
|
base: word.base,
|
||||||
|
furigana: word.furigana != null
|
||||||
|
? transliterateKanaToLatin(word.furigana!)
|
||||||
|
: transliterateKanaToLatin(word.base),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
.toList();
|
||||||
|
|
||||||
|
/// The same list of spans as [matchSpans], but the positions have been adjusted for romaji conversion
|
||||||
|
///
|
||||||
|
/// This is mostly useful in conjunction with [romajiRubys].
|
||||||
|
List<WordSearchMatchSpan>? get romajiMatchSpans {
|
||||||
|
if (matchSpans == null) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
throw UnimplementedError('Not yet implemented');
|
||||||
|
}
|
||||||
|
|
||||||
WordSearchResult({
|
WordSearchResult({
|
||||||
required this.score,
|
required this.score,
|
||||||
required this.entryId,
|
required this.entryId,
|
||||||
@@ -107,7 +137,7 @@ class WordSearchResult {
|
|||||||
/// Infers which part(s) of this word search result matched the search keyword, and populates [matchSpans] accordingly.
|
/// Infers which part(s) of this word search result matched the search keyword, and populates [matchSpans] accordingly.
|
||||||
void inferMatchSpans(
|
void inferMatchSpans(
|
||||||
String searchword, {
|
String searchword, {
|
||||||
SearchMode searchMode = SearchMode.Auto,
|
SearchMode searchMode = SearchMode.auto,
|
||||||
}) {
|
}) {
|
||||||
// TODO: handle wildcards like '?' and '*' when that becomes supported in the search.
|
// TODO: handle wildcards like '?' and '*' when that becomes supported in the search.
|
||||||
// TODO: If the searchMode is provided, we can use that to narrow down which part of the word search results to look at.
|
// TODO: If the searchMode is provided, we can use that to narrow down which part of the word search results to look at.
|
||||||
@@ -163,7 +193,7 @@ class WordSearchResult {
|
|||||||
this.matchSpans = matchSpans;
|
this.matchSpans = matchSpans;
|
||||||
}
|
}
|
||||||
|
|
||||||
String _formatJapaneseWord(WordSearchRuby word) =>
|
static String _formatJapaneseWord(WordSearchRuby word) =>
|
||||||
word.furigana == null ? word.base : '${word.base} (${word.furigana})';
|
word.furigana == null ? word.base : '${word.base} (${word.furigana})';
|
||||||
|
|
||||||
@override
|
@override
|
||||||
|
|||||||
@@ -30,7 +30,7 @@ extension JaDBConnection on DatabaseExecutor {
|
|||||||
/// Search for a word in the database.
|
/// Search for a word in the database.
|
||||||
Future<List<WordSearchResult>?> jadbSearchWord(
|
Future<List<WordSearchResult>?> jadbSearchWord(
|
||||||
String word, {
|
String word, {
|
||||||
SearchMode searchMode = SearchMode.Auto,
|
SearchMode searchMode = SearchMode.auto,
|
||||||
int page = 0,
|
int page = 0,
|
||||||
int? pageSize,
|
int? pageSize,
|
||||||
}) => searchWordWithDbConnection(
|
}) => searchWordWithDbConnection(
|
||||||
@@ -54,7 +54,7 @@ extension JaDBConnection on DatabaseExecutor {
|
|||||||
/// Search for a word in the database, and return the count of results.
|
/// Search for a word in the database, and return the count of results.
|
||||||
Future<int?> jadbSearchWordCount(
|
Future<int?> jadbSearchWordCount(
|
||||||
String word, {
|
String word, {
|
||||||
SearchMode searchMode = SearchMode.Auto,
|
SearchMode searchMode = SearchMode.auto,
|
||||||
}) => searchWordCountWithDbConnection(this, word, searchMode: searchMode);
|
}) => searchWordCountWithDbConnection(this, word, searchMode: searchMode);
|
||||||
|
|
||||||
/// Given a list of radicals, search which kanji contains all
|
/// Given a list of radicals, search which kanji contains all
|
||||||
|
|||||||
@@ -18,7 +18,15 @@ Future<List<String>> filterKanjiWithDbConnection(
|
|||||||
.then((value) => value.map((e) => e['literal'] as String).toSet());
|
.then((value) => value.map((e) => e['literal'] as String).toSet());
|
||||||
|
|
||||||
if (deduplicate) {
|
if (deduplicate) {
|
||||||
return filteredKanji.toList();
|
final List<String> result = [];
|
||||||
|
final Set<String> seen = {};
|
||||||
|
for (final k in kanji) {
|
||||||
|
if (filteredKanji.contains(k) && !seen.contains(k)) {
|
||||||
|
result.add(k);
|
||||||
|
seen.add(k);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result;
|
||||||
} else {
|
} else {
|
||||||
return kanji.where((k) => filteredKanji.contains(k)).toList();
|
return kanji.where((k) => filteredKanji.contains(k)).toList();
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -5,140 +5,186 @@ import 'package:jadb/table_names/kanjidic.dart';
|
|||||||
import 'package:jadb/table_names/radkfile.dart';
|
import 'package:jadb/table_names/radkfile.dart';
|
||||||
import 'package:sqflite_common/sqflite.dart';
|
import 'package:sqflite_common/sqflite.dart';
|
||||||
|
|
||||||
|
Future<List<Map<String, Object?>>> _charactersQuery(
|
||||||
|
DatabaseExecutor connection,
|
||||||
|
String kanji,
|
||||||
|
) => connection.query(
|
||||||
|
KANJIDICTableNames.character,
|
||||||
|
where: 'literal = ?',
|
||||||
|
whereArgs: [kanji],
|
||||||
|
);
|
||||||
|
|
||||||
|
Future<List<Map<String, Object?>>> _codepointsQuery(
|
||||||
|
DatabaseExecutor connection,
|
||||||
|
String kanji,
|
||||||
|
) => connection.query(
|
||||||
|
KANJIDICTableNames.codepoint,
|
||||||
|
where: 'kanji = ?',
|
||||||
|
whereArgs: [kanji],
|
||||||
|
);
|
||||||
|
|
||||||
|
Future<List<Map<String, Object?>>> _kunyomisQuery(
|
||||||
|
DatabaseExecutor connection,
|
||||||
|
String kanji,
|
||||||
|
) => connection.query(
|
||||||
|
KANJIDICTableNames.kunyomi,
|
||||||
|
where: 'kanji = ?',
|
||||||
|
whereArgs: [kanji],
|
||||||
|
orderBy: 'orderNum',
|
||||||
|
);
|
||||||
|
|
||||||
|
Future<List<Map<String, Object?>>> _onyomisQuery(
|
||||||
|
DatabaseExecutor connection,
|
||||||
|
String kanji,
|
||||||
|
) => connection.query(
|
||||||
|
KANJIDICTableNames.onyomi,
|
||||||
|
where: 'kanji = ?',
|
||||||
|
whereArgs: [kanji],
|
||||||
|
orderBy: 'orderNum',
|
||||||
|
);
|
||||||
|
|
||||||
|
Future<List<Map<String, Object?>>> _meaningsQuery(
|
||||||
|
DatabaseExecutor connection,
|
||||||
|
String kanji,
|
||||||
|
) => connection.query(
|
||||||
|
KANJIDICTableNames.meaning,
|
||||||
|
where: 'kanji = ? AND language = ?',
|
||||||
|
whereArgs: [kanji, 'eng'],
|
||||||
|
orderBy: 'orderNum',
|
||||||
|
);
|
||||||
|
|
||||||
|
Future<List<Map<String, Object?>>> _nanorisQuery(
|
||||||
|
DatabaseExecutor connection,
|
||||||
|
String kanji,
|
||||||
|
) => connection.query(
|
||||||
|
KANJIDICTableNames.nanori,
|
||||||
|
where: 'kanji = ?',
|
||||||
|
whereArgs: [kanji],
|
||||||
|
);
|
||||||
|
|
||||||
|
Future<List<Map<String, Object?>>> _dictionaryReferencesQuery(
|
||||||
|
DatabaseExecutor connection,
|
||||||
|
String kanji,
|
||||||
|
) => connection.query(
|
||||||
|
KANJIDICTableNames.dictionaryReference,
|
||||||
|
where: 'kanji = ?',
|
||||||
|
whereArgs: [kanji],
|
||||||
|
);
|
||||||
|
|
||||||
|
Future<List<Map<String, Object?>>> _queryCodesQuery(
|
||||||
|
DatabaseExecutor connection,
|
||||||
|
String kanji,
|
||||||
|
) => connection.query(
|
||||||
|
KANJIDICTableNames.queryCode,
|
||||||
|
where: 'kanji = ?',
|
||||||
|
whereArgs: [kanji],
|
||||||
|
);
|
||||||
|
|
||||||
|
Future<List<Map<String, Object?>>> _radicalsQuery(
|
||||||
|
DatabaseExecutor connection,
|
||||||
|
String kanji,
|
||||||
|
) => connection.rawQuery(
|
||||||
|
'''
|
||||||
|
SELECT DISTINCT
|
||||||
|
"XREF__KANJIDIC_Radical__RADKFILE"."radicalSymbol" AS "symbol",
|
||||||
|
"names"
|
||||||
|
FROM "${KANJIDICTableNames.radical}"
|
||||||
|
JOIN "XREF__KANJIDIC_Radical__RADKFILE" USING ("radicalId")
|
||||||
|
LEFT JOIN (
|
||||||
|
SELECT "radicalId", group_concat("name") AS "names"
|
||||||
|
FROM "${KANJIDICTableNames.radicalName}"
|
||||||
|
GROUP BY "radicalId"
|
||||||
|
) USING ("radicalId")
|
||||||
|
WHERE "${KANJIDICTableNames.radical}"."kanji" = ?
|
||||||
|
''',
|
||||||
|
[kanji],
|
||||||
|
);
|
||||||
|
|
||||||
|
Future<List<Map<String, Object?>>> _partsQuery(
|
||||||
|
DatabaseExecutor connection,
|
||||||
|
String kanji,
|
||||||
|
) => connection.query(
|
||||||
|
RADKFILETableNames.radkfile,
|
||||||
|
where: 'kanji = ?',
|
||||||
|
whereArgs: [kanji],
|
||||||
|
);
|
||||||
|
|
||||||
|
Future<List<Map<String, Object?>>> _readingsQuery(
|
||||||
|
DatabaseExecutor connection,
|
||||||
|
String kanji,
|
||||||
|
) => connection.query(
|
||||||
|
KANJIDICTableNames.reading,
|
||||||
|
where: 'kanji = ?',
|
||||||
|
whereArgs: [kanji],
|
||||||
|
);
|
||||||
|
|
||||||
|
Future<List<Map<String, Object?>>> _strokeMiscountsQuery(
|
||||||
|
DatabaseExecutor connection,
|
||||||
|
String kanji,
|
||||||
|
) => connection.query(
|
||||||
|
KANJIDICTableNames.strokeMiscount,
|
||||||
|
where: 'kanji = ?',
|
||||||
|
whereArgs: [kanji],
|
||||||
|
);
|
||||||
|
|
||||||
|
// Future<List<Map<String, Object?>>> _variantsQuery(
|
||||||
|
// DatabaseExecutor connection,
|
||||||
|
// String kanji,
|
||||||
|
// ) => connection.query(
|
||||||
|
// KANJIDICTableNames.variant,
|
||||||
|
// where: 'kanji = ?',
|
||||||
|
// whereArgs: [kanji],
|
||||||
|
// );
|
||||||
|
|
||||||
/// Searches for a kanji character and returns its details, or null if the kanji is not found in the database.
|
/// Searches for a kanji character and returns its details, or null if the kanji is not found in the database.
|
||||||
Future<KanjiSearchResult?> searchKanjiWithDbConnection(
|
Future<KanjiSearchResult?> searchKanjiWithDbConnection(
|
||||||
DatabaseExecutor connection,
|
DatabaseExecutor connection,
|
||||||
String kanji,
|
String kanji,
|
||||||
) async {
|
) async {
|
||||||
late final List<Map<String, Object?>> characters;
|
late final List<Map<String, Object?>> characters;
|
||||||
final charactersQuery = connection.query(
|
|
||||||
KANJIDICTableNames.character,
|
|
||||||
where: 'literal = ?',
|
|
||||||
whereArgs: [kanji],
|
|
||||||
);
|
|
||||||
|
|
||||||
late final List<Map<String, Object?>> codepoints;
|
late final List<Map<String, Object?>> codepoints;
|
||||||
final codepointsQuery = connection.query(
|
|
||||||
KANJIDICTableNames.codepoint,
|
|
||||||
where: 'kanji = ?',
|
|
||||||
whereArgs: [kanji],
|
|
||||||
);
|
|
||||||
|
|
||||||
late final List<Map<String, Object?>> kunyomis;
|
late final List<Map<String, Object?>> kunyomis;
|
||||||
final kunyomisQuery = connection.query(
|
|
||||||
KANJIDICTableNames.kunyomi,
|
|
||||||
where: 'kanji = ?',
|
|
||||||
whereArgs: [kanji],
|
|
||||||
orderBy: 'orderNum',
|
|
||||||
);
|
|
||||||
|
|
||||||
late final List<Map<String, Object?>> onyomis;
|
late final List<Map<String, Object?>> onyomis;
|
||||||
final onyomisQuery = connection.query(
|
|
||||||
KANJIDICTableNames.onyomi,
|
|
||||||
where: 'kanji = ?',
|
|
||||||
whereArgs: [kanji],
|
|
||||||
orderBy: 'orderNum',
|
|
||||||
);
|
|
||||||
|
|
||||||
late final List<Map<String, Object?>> meanings;
|
late final List<Map<String, Object?>> meanings;
|
||||||
final meaningsQuery = connection.query(
|
|
||||||
KANJIDICTableNames.meaning,
|
|
||||||
where: 'kanji = ? AND language = ?',
|
|
||||||
whereArgs: [kanji, 'eng'],
|
|
||||||
orderBy: 'orderNum',
|
|
||||||
);
|
|
||||||
|
|
||||||
late final List<Map<String, Object?>> nanoris;
|
late final List<Map<String, Object?>> nanoris;
|
||||||
final nanorisQuery = connection.query(
|
|
||||||
KANJIDICTableNames.nanori,
|
|
||||||
where: 'kanji = ?',
|
|
||||||
whereArgs: [kanji],
|
|
||||||
);
|
|
||||||
|
|
||||||
late final List<Map<String, Object?>> dictionaryReferences;
|
late final List<Map<String, Object?>> dictionaryReferences;
|
||||||
final dictionaryReferencesQuery = connection.query(
|
|
||||||
KANJIDICTableNames.dictionaryReference,
|
|
||||||
where: 'kanji = ?',
|
|
||||||
whereArgs: [kanji],
|
|
||||||
);
|
|
||||||
|
|
||||||
late final List<Map<String, Object?>> queryCodes;
|
late final List<Map<String, Object?>> queryCodes;
|
||||||
final queryCodesQuery = connection.query(
|
|
||||||
KANJIDICTableNames.queryCode,
|
|
||||||
where: 'kanji = ?',
|
|
||||||
whereArgs: [kanji],
|
|
||||||
);
|
|
||||||
|
|
||||||
late final List<Map<String, Object?>> radicals;
|
late final List<Map<String, Object?>> radicals;
|
||||||
final radicalsQuery = connection.rawQuery(
|
|
||||||
'''
|
|
||||||
SELECT DISTINCT
|
|
||||||
"XREF__KANJIDIC_Radical__RADKFILE"."radicalSymbol" AS "symbol",
|
|
||||||
"names"
|
|
||||||
FROM "${KANJIDICTableNames.radical}"
|
|
||||||
JOIN "XREF__KANJIDIC_Radical__RADKFILE" USING ("radicalId")
|
|
||||||
LEFT JOIN (
|
|
||||||
SELECT "radicalId", group_concat("name") AS "names"
|
|
||||||
FROM "${KANJIDICTableNames.radicalName}"
|
|
||||||
GROUP BY "radicalId"
|
|
||||||
) USING ("radicalId")
|
|
||||||
WHERE "${KANJIDICTableNames.radical}"."kanji" = ?
|
|
||||||
''',
|
|
||||||
[kanji],
|
|
||||||
);
|
|
||||||
|
|
||||||
late final List<Map<String, Object?>> parts;
|
late final List<Map<String, Object?>> parts;
|
||||||
final partsQuery = connection.query(
|
|
||||||
RADKFILETableNames.radkfile,
|
|
||||||
where: 'kanji = ?',
|
|
||||||
whereArgs: [kanji],
|
|
||||||
);
|
|
||||||
|
|
||||||
late final List<Map<String, Object?>> readings;
|
late final List<Map<String, Object?>> readings;
|
||||||
final readingsQuery = connection.query(
|
|
||||||
KANJIDICTableNames.reading,
|
|
||||||
where: 'kanji = ?',
|
|
||||||
whereArgs: [kanji],
|
|
||||||
);
|
|
||||||
|
|
||||||
late final List<Map<String, Object?>> strokeMiscounts;
|
late final List<Map<String, Object?>> strokeMiscounts;
|
||||||
final strokeMiscountsQuery = connection.query(
|
|
||||||
KANJIDICTableNames.strokeMiscount,
|
|
||||||
where: 'kanji = ?',
|
|
||||||
whereArgs: [kanji],
|
|
||||||
);
|
|
||||||
|
|
||||||
// TODO: add variant data to result
|
// TODO: add variant data to result
|
||||||
// late final List<Map<String, Object?>> variants;
|
// late final List<Map<String, Object?>> variants;
|
||||||
// final variants_query = connection.query(
|
|
||||||
// KANJIDICTableNames.variant,
|
|
||||||
// where: "kanji = ?",
|
|
||||||
// whereArgs: [kanji],
|
|
||||||
// );
|
|
||||||
|
|
||||||
// TODO: Search for kunyomi and onyomi usage of the characters
|
// TODO: Search for kunyomi and onyomi usage of the characters
|
||||||
// from JMDict. We'll need to fuzzy aquery JMDict_KanjiElement for mathces,
|
// from JMDict. We'll need to fuzzy aquery JMDict_KanjiElement for matches,
|
||||||
// filter JMdict_ReadingElement for kunyomi/onyomi, and then sort the main entry
|
// filter JMdict_ReadingElement for kunyomi/onyomi, and then sort the main entry
|
||||||
// by JLPT, news frequency, etc.
|
// by JLPT, news frequency, etc.
|
||||||
|
|
||||||
await charactersQuery.then((value) => characters = value);
|
await _charactersQuery(connection, kanji).then((value) => characters = value);
|
||||||
|
|
||||||
if (characters.isEmpty) {
|
if (characters.isEmpty) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
await Future.wait({
|
await Future.wait({
|
||||||
codepointsQuery.then((value) => codepoints = value),
|
_codepointsQuery(connection, kanji).then((value) => codepoints = value),
|
||||||
kunyomisQuery.then((value) => kunyomis = value),
|
_kunyomisQuery(connection, kanji).then((value) => kunyomis = value),
|
||||||
onyomisQuery.then((value) => onyomis = value),
|
_onyomisQuery(connection, kanji).then((value) => onyomis = value),
|
||||||
meaningsQuery.then((value) => meanings = value),
|
_meaningsQuery(connection, kanji).then((value) => meanings = value),
|
||||||
nanorisQuery.then((value) => nanoris = value),
|
_nanorisQuery(connection, kanji).then((value) => nanoris = value),
|
||||||
dictionaryReferencesQuery.then((value) => dictionaryReferences = value),
|
_dictionaryReferencesQuery(
|
||||||
queryCodesQuery.then((value) => queryCodes = value),
|
connection,
|
||||||
radicalsQuery.then((value) => radicals = value),
|
kanji,
|
||||||
partsQuery.then((value) => parts = value),
|
).then((value) => dictionaryReferences = value),
|
||||||
readingsQuery.then((value) => readings = value),
|
_queryCodesQuery(connection, kanji).then((value) => queryCodes = value),
|
||||||
strokeMiscountsQuery.then((value) => strokeMiscounts = value),
|
_radicalsQuery(connection, kanji).then((value) => radicals = value),
|
||||||
|
_partsQuery(connection, kanji).then((value) => parts = value),
|
||||||
|
_readingsQuery(connection, kanji).then((value) => readings = value),
|
||||||
|
_strokeMiscountsQuery(
|
||||||
|
connection,
|
||||||
|
kanji,
|
||||||
|
).then((value) => strokeMiscounts = value),
|
||||||
// variants_query.then((value) => variants = value),
|
// variants_query.then((value) => variants = value),
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|||||||
@@ -53,274 +53,363 @@ class LinearWordQueryData {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Future<List<Map<String, Object?>>> _sensesQuery(
|
||||||
|
DatabaseExecutor connection,
|
||||||
|
List<int> entryIds,
|
||||||
|
) => connection.query(
|
||||||
|
JMdictTableNames.sense,
|
||||||
|
where: 'entryId IN (${List.filled(entryIds.length, '?').join(',')})',
|
||||||
|
whereArgs: entryIds,
|
||||||
|
);
|
||||||
|
|
||||||
|
Future<List<Map<String, Object?>>> _readingelementsQuery(
|
||||||
|
DatabaseExecutor connection,
|
||||||
|
List<int> entryIds,
|
||||||
|
) => connection.query(
|
||||||
|
JMdictTableNames.readingElement,
|
||||||
|
where: 'entryId IN (${List.filled(entryIds.length, '?').join(',')})',
|
||||||
|
whereArgs: entryIds,
|
||||||
|
orderBy: 'orderNum',
|
||||||
|
);
|
||||||
|
|
||||||
|
Future<List<Map<String, Object?>>> _kanjielementsQuery(
|
||||||
|
DatabaseExecutor connection,
|
||||||
|
List<int> entryIds,
|
||||||
|
) => connection.query(
|
||||||
|
JMdictTableNames.kanjiElement,
|
||||||
|
where: 'entryId IN (${List.filled(entryIds.length, '?').join(',')})',
|
||||||
|
whereArgs: entryIds,
|
||||||
|
orderBy: 'orderNum',
|
||||||
|
);
|
||||||
|
|
||||||
|
Future<List<Map<String, Object?>>> _jlpttagsQuery(
|
||||||
|
DatabaseExecutor connection,
|
||||||
|
List<int> entryIds,
|
||||||
|
) => connection.query(
|
||||||
|
TanosJLPTTableNames.jlptTag,
|
||||||
|
where: 'entryId IN (${List.filled(entryIds.length, '?').join(',')})',
|
||||||
|
whereArgs: entryIds,
|
||||||
|
);
|
||||||
|
|
||||||
|
Future<List<Map<String, Object?>>> _commonentriesQuery(
|
||||||
|
DatabaseExecutor connection,
|
||||||
|
List<int> entryIds,
|
||||||
|
) => connection.query(
|
||||||
|
'JMdict_EntryCommon',
|
||||||
|
where: 'entryId IN (${List.filled(entryIds.length, '?').join(',')})',
|
||||||
|
whereArgs: entryIds,
|
||||||
|
);
|
||||||
|
|
||||||
|
// Sense queries
|
||||||
|
|
||||||
|
Future<List<Map<String, Object?>>> _senseantonymsQuery(
|
||||||
|
DatabaseExecutor connection,
|
||||||
|
List<int> senseIds,
|
||||||
|
) => connection.rawQuery(
|
||||||
|
"""
|
||||||
|
SELECT
|
||||||
|
"${JMdictTableNames.senseAntonyms}".senseId,
|
||||||
|
"${JMdictTableNames.senseAntonyms}".ambiguous,
|
||||||
|
"${JMdictTableNames.senseAntonyms}".xrefEntryId,
|
||||||
|
"JMdict_BaseAndFurigana"."base",
|
||||||
|
"JMdict_BaseAndFurigana"."furigana"
|
||||||
|
FROM "${JMdictTableNames.senseAntonyms}"
|
||||||
|
JOIN "JMdict_BaseAndFurigana"
|
||||||
|
ON "${JMdictTableNames.senseAntonyms}"."xrefEntryId" = "JMdict_BaseAndFurigana"."entryId"
|
||||||
|
WHERE
|
||||||
|
"senseId" IN (${List.filled(senseIds.length, '?').join(',')})
|
||||||
|
AND "JMdict_BaseAndFurigana"."isFirst"
|
||||||
|
ORDER BY
|
||||||
|
"${JMdictTableNames.senseAntonyms}"."senseId",
|
||||||
|
"${JMdictTableNames.senseAntonyms}"."xrefEntryId"
|
||||||
|
""",
|
||||||
|
[...senseIds],
|
||||||
|
);
|
||||||
|
|
||||||
|
Future<List<Map<String, Object?>>> _senseseealsosQuery(
|
||||||
|
DatabaseExecutor connection,
|
||||||
|
List<int> senseIds,
|
||||||
|
) => connection.rawQuery(
|
||||||
|
"""
|
||||||
|
SELECT
|
||||||
|
"${JMdictTableNames.senseSeeAlso}"."senseId",
|
||||||
|
"${JMdictTableNames.senseSeeAlso}"."ambiguous",
|
||||||
|
"${JMdictTableNames.senseSeeAlso}"."xrefEntryId",
|
||||||
|
"JMdict_BaseAndFurigana"."base",
|
||||||
|
"JMdict_BaseAndFurigana"."furigana"
|
||||||
|
FROM "${JMdictTableNames.senseSeeAlso}"
|
||||||
|
JOIN "JMdict_BaseAndFurigana"
|
||||||
|
ON "${JMdictTableNames.senseSeeAlso}"."xrefEntryId" = "JMdict_BaseAndFurigana"."entryId"
|
||||||
|
WHERE
|
||||||
|
"senseId" IN (${List.filled(senseIds.length, '?').join(',')})
|
||||||
|
AND "JMdict_BaseAndFurigana"."isFirst"
|
||||||
|
ORDER BY
|
||||||
|
"${JMdictTableNames.senseSeeAlso}"."senseId",
|
||||||
|
"${JMdictTableNames.senseSeeAlso}"."xrefEntryId"
|
||||||
|
""",
|
||||||
|
[...senseIds],
|
||||||
|
);
|
||||||
|
|
||||||
|
Future<List<Map<String, Object?>>> _sensedialectsQuery(
|
||||||
|
DatabaseExecutor connection,
|
||||||
|
List<int> senseIds,
|
||||||
|
) => connection.query(
|
||||||
|
JMdictTableNames.senseDialect,
|
||||||
|
where: 'senseId IN (${List.filled(senseIds.length, '?').join(',')})',
|
||||||
|
whereArgs: senseIds,
|
||||||
|
);
|
||||||
|
|
||||||
|
Future<List<Map<String, Object?>>> _sensefieldsQuery(
|
||||||
|
DatabaseExecutor connection,
|
||||||
|
List<int> senseIds,
|
||||||
|
) => connection.query(
|
||||||
|
JMdictTableNames.senseField,
|
||||||
|
where: 'senseId IN (${List.filled(senseIds.length, '?').join(',')})',
|
||||||
|
whereArgs: senseIds,
|
||||||
|
);
|
||||||
|
|
||||||
|
Future<List<Map<String, Object?>>> _senseglossariesQuery(
|
||||||
|
DatabaseExecutor connection,
|
||||||
|
List<int> senseIds,
|
||||||
|
) => connection.query(
|
||||||
|
JMdictTableNames.senseGlossary,
|
||||||
|
where: 'senseId IN (${List.filled(senseIds.length, '?').join(',')})',
|
||||||
|
whereArgs: senseIds,
|
||||||
|
);
|
||||||
|
|
||||||
|
Future<List<Map<String, Object?>>> _senseinfosQuery(
|
||||||
|
DatabaseExecutor connection,
|
||||||
|
List<int> senseIds,
|
||||||
|
) => connection.query(
|
||||||
|
JMdictTableNames.senseInfo,
|
||||||
|
where: 'senseId IN (${List.filled(senseIds.length, '?').join(',')})',
|
||||||
|
whereArgs: senseIds,
|
||||||
|
);
|
||||||
|
|
||||||
|
Future<List<Map<String, Object?>>> _senselanguagesourcesQuery(
|
||||||
|
DatabaseExecutor connection,
|
||||||
|
List<int> senseIds,
|
||||||
|
) => connection.query(
|
||||||
|
JMdictTableNames.senseLanguageSource,
|
||||||
|
where: 'senseId IN (${List.filled(senseIds.length, '?').join(',')})',
|
||||||
|
whereArgs: senseIds,
|
||||||
|
);
|
||||||
|
|
||||||
|
Future<List<Map<String, Object?>>> _sensemiscsQuery(
|
||||||
|
DatabaseExecutor connection,
|
||||||
|
List<int> senseIds,
|
||||||
|
) => connection.query(
|
||||||
|
JMdictTableNames.senseMisc,
|
||||||
|
where: 'senseId IN (${List.filled(senseIds.length, '?').join(',')})',
|
||||||
|
whereArgs: senseIds,
|
||||||
|
);
|
||||||
|
|
||||||
|
Future<List<Map<String, Object?>>> _sensepossQuery(
|
||||||
|
DatabaseExecutor connection,
|
||||||
|
List<int> senseIds,
|
||||||
|
) => connection.query(
|
||||||
|
JMdictTableNames.sensePOS,
|
||||||
|
where: 'senseId IN (${List.filled(senseIds.length, '?').join(',')})',
|
||||||
|
whereArgs: senseIds,
|
||||||
|
);
|
||||||
|
|
||||||
|
Future<List<Map<String, Object?>>> _senserestrictedtokanjisQuery(
|
||||||
|
DatabaseExecutor connection,
|
||||||
|
List<int> senseIds,
|
||||||
|
) => connection.query(
|
||||||
|
JMdictTableNames.senseRestrictedToKanji,
|
||||||
|
where: 'senseId IN (${List.filled(senseIds.length, '?').join(',')})',
|
||||||
|
whereArgs: senseIds,
|
||||||
|
);
|
||||||
|
|
||||||
|
Future<List<Map<String, Object?>>> _senserestrictedtoreadingsQuery(
|
||||||
|
DatabaseExecutor connection,
|
||||||
|
List<int> senseIds,
|
||||||
|
) => connection.query(
|
||||||
|
JMdictTableNames.senseRestrictedToReading,
|
||||||
|
where: 'senseId IN (${List.filled(senseIds.length, '?').join(',')})',
|
||||||
|
whereArgs: senseIds,
|
||||||
|
);
|
||||||
|
|
||||||
|
Future<List<Map<String, Object?>>> _examplesentencesQuery(
|
||||||
|
DatabaseExecutor connection,
|
||||||
|
List<int> senseIds,
|
||||||
|
) => connection.query(
|
||||||
|
'JMdict_ExampleSentence',
|
||||||
|
where: 'senseId IN (${List.filled(senseIds.length, '?').join(',')})',
|
||||||
|
whereArgs: senseIds,
|
||||||
|
);
|
||||||
|
|
||||||
|
// Reading/kanji elements queries
|
||||||
|
|
||||||
|
Future<List<Map<String, Object?>>> _readingelementinfosQuery(
|
||||||
|
DatabaseExecutor connection,
|
||||||
|
List<int> readingIds,
|
||||||
|
) => connection.query(
|
||||||
|
JMdictTableNames.readingInfo,
|
||||||
|
where: '(elementId) IN (${List.filled(readingIds.length, '?').join(',')})',
|
||||||
|
whereArgs: readingIds,
|
||||||
|
);
|
||||||
|
|
||||||
|
Future<List<Map<String, Object?>>> _readingelementrestrictionsQuery(
|
||||||
|
DatabaseExecutor connection,
|
||||||
|
List<int> readingIds,
|
||||||
|
) => connection.query(
|
||||||
|
JMdictTableNames.readingRestriction,
|
||||||
|
where: '(elementId) IN (${List.filled(readingIds.length, '?').join(',')})',
|
||||||
|
whereArgs: readingIds,
|
||||||
|
);
|
||||||
|
|
||||||
|
Future<List<Map<String, Object?>>> _kanjielementinfosQuery(
|
||||||
|
DatabaseExecutor connection,
|
||||||
|
List<int> kanjiIds,
|
||||||
|
) => connection.query(
|
||||||
|
JMdictTableNames.kanjiInfo,
|
||||||
|
where: '(elementId) IN (${List.filled(kanjiIds.length, '?').join(',')})',
|
||||||
|
whereArgs: kanjiIds,
|
||||||
|
);
|
||||||
|
|
||||||
|
// Xref queries
|
||||||
|
|
||||||
|
Future<LinearWordQueryData?> _senseantonymdataQuery(
|
||||||
|
DatabaseExecutor connection,
|
||||||
|
List<int> entryIds,
|
||||||
|
) => fetchLinearWordQueryData(connection, entryIds, fetchXrefData: false);
|
||||||
|
|
||||||
|
Future<LinearWordQueryData?> _senseseealsodataQuery(
|
||||||
|
DatabaseExecutor connection,
|
||||||
|
List<int> entryIds,
|
||||||
|
) => fetchLinearWordQueryData(connection, entryIds, fetchXrefData: false);
|
||||||
|
|
||||||
|
// Full query
|
||||||
|
|
||||||
Future<LinearWordQueryData> fetchLinearWordQueryData(
|
Future<LinearWordQueryData> fetchLinearWordQueryData(
|
||||||
DatabaseExecutor connection,
|
DatabaseExecutor connection,
|
||||||
List<int> entryIds, {
|
List<int> entryIds, {
|
||||||
bool fetchXrefData = true,
|
bool fetchXrefData = true,
|
||||||
}) async {
|
}) async {
|
||||||
late final List<Map<String, Object?>> senses;
|
late final List<Map<String, Object?>> senses;
|
||||||
final Future<List<Map<String, Object?>>> sensesQuery = connection.query(
|
|
||||||
JMdictTableNames.sense,
|
|
||||||
where: 'entryId IN (${List.filled(entryIds.length, '?').join(',')})',
|
|
||||||
whereArgs: entryIds,
|
|
||||||
);
|
|
||||||
|
|
||||||
late final List<Map<String, Object?>> readingElements;
|
late final List<Map<String, Object?>> readingElements;
|
||||||
final Future<List<Map<String, Object?>>> readingelementsQuery = connection
|
|
||||||
.query(
|
|
||||||
JMdictTableNames.readingElement,
|
|
||||||
where: 'entryId IN (${List.filled(entryIds.length, '?').join(',')})',
|
|
||||||
whereArgs: entryIds,
|
|
||||||
orderBy: 'orderNum',
|
|
||||||
);
|
|
||||||
|
|
||||||
late final List<Map<String, Object?>> kanjiElements;
|
late final List<Map<String, Object?>> kanjiElements;
|
||||||
final Future<List<Map<String, Object?>>> kanjielementsQuery = connection
|
|
||||||
.query(
|
|
||||||
JMdictTableNames.kanjiElement,
|
|
||||||
where: 'entryId IN (${List.filled(entryIds.length, '?').join(',')})',
|
|
||||||
whereArgs: entryIds,
|
|
||||||
orderBy: 'orderNum',
|
|
||||||
);
|
|
||||||
|
|
||||||
late final List<Map<String, Object?>> jlptTags;
|
late final List<Map<String, Object?>> jlptTags;
|
||||||
final Future<List<Map<String, Object?>>> jlpttagsQuery = connection.query(
|
|
||||||
TanosJLPTTableNames.jlptTag,
|
|
||||||
where: 'entryId IN (${List.filled(entryIds.length, '?').join(',')})',
|
|
||||||
whereArgs: entryIds,
|
|
||||||
);
|
|
||||||
|
|
||||||
late final List<Map<String, Object?>> commonEntries;
|
late final List<Map<String, Object?>> commonEntries;
|
||||||
final Future<List<Map<String, Object?>>> commonentriesQuery = connection
|
|
||||||
.query(
|
|
||||||
'JMdict_EntryCommon',
|
|
||||||
where: 'entryId IN (${List.filled(entryIds.length, '?').join(',')})',
|
|
||||||
whereArgs: entryIds,
|
|
||||||
);
|
|
||||||
|
|
||||||
await Future.wait([
|
await Future.wait([
|
||||||
sensesQuery.then((value) => senses = value),
|
_sensesQuery(connection, entryIds).then((value) => senses = value),
|
||||||
readingelementsQuery.then((value) => readingElements = value),
|
_readingelementsQuery(
|
||||||
kanjielementsQuery.then((value) => kanjiElements = value),
|
connection,
|
||||||
jlpttagsQuery.then((value) => jlptTags = value),
|
entryIds,
|
||||||
commonentriesQuery.then((value) => commonEntries = value),
|
).then((value) => readingElements = value),
|
||||||
|
_kanjielementsQuery(
|
||||||
|
connection,
|
||||||
|
entryIds,
|
||||||
|
).then((value) => kanjiElements = value),
|
||||||
|
_jlpttagsQuery(connection, entryIds).then((value) => jlptTags = value),
|
||||||
|
_commonentriesQuery(
|
||||||
|
connection,
|
||||||
|
entryIds,
|
||||||
|
).then((value) => commonEntries = value),
|
||||||
]);
|
]);
|
||||||
|
|
||||||
// Sense queries
|
|
||||||
|
|
||||||
final senseIds = senses.map((sense) => sense['senseId'] as int).toList();
|
final senseIds = senses.map((sense) => sense['senseId'] as int).toList();
|
||||||
|
|
||||||
late final List<Map<String, Object?>> senseAntonyms;
|
late final List<Map<String, Object?>> senseAntonyms;
|
||||||
final Future<List<Map<String, Object?>>> senseantonymsQuery = connection
|
|
||||||
.rawQuery(
|
|
||||||
"""
|
|
||||||
SELECT
|
|
||||||
"${JMdictTableNames.senseAntonyms}".senseId,
|
|
||||||
"${JMdictTableNames.senseAntonyms}".ambiguous,
|
|
||||||
"${JMdictTableNames.senseAntonyms}".xrefEntryId,
|
|
||||||
"JMdict_BaseAndFurigana"."base",
|
|
||||||
"JMdict_BaseAndFurigana"."furigana"
|
|
||||||
FROM "${JMdictTableNames.senseAntonyms}"
|
|
||||||
JOIN "JMdict_BaseAndFurigana"
|
|
||||||
ON "${JMdictTableNames.senseAntonyms}"."xrefEntryId" = "JMdict_BaseAndFurigana"."entryId"
|
|
||||||
WHERE
|
|
||||||
"senseId" IN (${List.filled(senseIds.length, '?').join(',')})
|
|
||||||
AND "JMdict_BaseAndFurigana"."isFirst"
|
|
||||||
ORDER BY
|
|
||||||
"${JMdictTableNames.senseAntonyms}"."senseId",
|
|
||||||
"${JMdictTableNames.senseAntonyms}"."xrefEntryId"
|
|
||||||
""",
|
|
||||||
[...senseIds],
|
|
||||||
);
|
|
||||||
|
|
||||||
late final List<Map<String, Object?>> senseDialects;
|
late final List<Map<String, Object?>> senseDialects;
|
||||||
final Future<List<Map<String, Object?>>> sensedialectsQuery = connection
|
|
||||||
.query(
|
|
||||||
JMdictTableNames.senseDialect,
|
|
||||||
where: 'senseId IN (${List.filled(senseIds.length, '?').join(',')})',
|
|
||||||
whereArgs: senseIds,
|
|
||||||
);
|
|
||||||
|
|
||||||
late final List<Map<String, Object?>> senseFields;
|
late final List<Map<String, Object?>> senseFields;
|
||||||
final Future<List<Map<String, Object?>>> sensefieldsQuery = connection.query(
|
|
||||||
JMdictTableNames.senseField,
|
|
||||||
where: 'senseId IN (${List.filled(senseIds.length, '?').join(',')})',
|
|
||||||
whereArgs: senseIds,
|
|
||||||
);
|
|
||||||
|
|
||||||
late final List<Map<String, Object?>> senseGlossaries;
|
late final List<Map<String, Object?>> senseGlossaries;
|
||||||
final Future<List<Map<String, Object?>>> senseglossariesQuery = connection
|
|
||||||
.query(
|
|
||||||
JMdictTableNames.senseGlossary,
|
|
||||||
where: 'senseId IN (${List.filled(senseIds.length, '?').join(',')})',
|
|
||||||
whereArgs: senseIds,
|
|
||||||
);
|
|
||||||
|
|
||||||
late final List<Map<String, Object?>> senseInfos;
|
late final List<Map<String, Object?>> senseInfos;
|
||||||
final Future<List<Map<String, Object?>>> senseinfosQuery = connection.query(
|
|
||||||
JMdictTableNames.senseInfo,
|
|
||||||
where: 'senseId IN (${List.filled(senseIds.length, '?').join(',')})',
|
|
||||||
whereArgs: senseIds,
|
|
||||||
);
|
|
||||||
|
|
||||||
late final List<Map<String, Object?>> senseLanguageSources;
|
late final List<Map<String, Object?>> senseLanguageSources;
|
||||||
final Future<List<Map<String, Object?>>> senselanguagesourcesQuery =
|
|
||||||
connection.query(
|
|
||||||
JMdictTableNames.senseLanguageSource,
|
|
||||||
where: 'senseId IN (${List.filled(senseIds.length, '?').join(',')})',
|
|
||||||
whereArgs: senseIds,
|
|
||||||
);
|
|
||||||
|
|
||||||
late final List<Map<String, Object?>> senseMiscs;
|
late final List<Map<String, Object?>> senseMiscs;
|
||||||
final Future<List<Map<String, Object?>>> sensemiscsQuery = connection.query(
|
|
||||||
JMdictTableNames.senseMisc,
|
|
||||||
where: 'senseId IN (${List.filled(senseIds.length, '?').join(',')})',
|
|
||||||
whereArgs: senseIds,
|
|
||||||
);
|
|
||||||
|
|
||||||
late final List<Map<String, Object?>> sensePOSs;
|
late final List<Map<String, Object?>> sensePOSs;
|
||||||
final Future<List<Map<String, Object?>>> sensepossQuery = connection.query(
|
|
||||||
JMdictTableNames.sensePOS,
|
|
||||||
where: 'senseId IN (${List.filled(senseIds.length, '?').join(',')})',
|
|
||||||
whereArgs: senseIds,
|
|
||||||
);
|
|
||||||
|
|
||||||
late final List<Map<String, Object?>> senseRestrictedToKanjis;
|
late final List<Map<String, Object?>> senseRestrictedToKanjis;
|
||||||
final Future<List<Map<String, Object?>>> senserestrictedtokanjisQuery =
|
|
||||||
connection.query(
|
|
||||||
JMdictTableNames.senseRestrictedToKanji,
|
|
||||||
where: 'senseId IN (${List.filled(senseIds.length, '?').join(',')})',
|
|
||||||
whereArgs: senseIds,
|
|
||||||
);
|
|
||||||
|
|
||||||
late final List<Map<String, Object?>> senseRestrictedToReadings;
|
late final List<Map<String, Object?>> senseRestrictedToReadings;
|
||||||
final Future<List<Map<String, Object?>>> senserestrictedtoreadingsQuery =
|
|
||||||
connection.query(
|
|
||||||
JMdictTableNames.senseRestrictedToReading,
|
|
||||||
where: 'senseId IN (${List.filled(senseIds.length, '?').join(',')})',
|
|
||||||
whereArgs: senseIds,
|
|
||||||
);
|
|
||||||
|
|
||||||
late final List<Map<String, Object?>> senseSeeAlsos;
|
late final List<Map<String, Object?>> senseSeeAlsos;
|
||||||
final Future<List<Map<String, Object?>>> senseseealsosQuery = connection
|
|
||||||
.rawQuery(
|
|
||||||
"""
|
|
||||||
SELECT
|
|
||||||
"${JMdictTableNames.senseSeeAlso}"."senseId",
|
|
||||||
"${JMdictTableNames.senseSeeAlso}"."ambiguous",
|
|
||||||
"${JMdictTableNames.senseSeeAlso}"."xrefEntryId",
|
|
||||||
"JMdict_BaseAndFurigana"."base",
|
|
||||||
"JMdict_BaseAndFurigana"."furigana"
|
|
||||||
FROM "${JMdictTableNames.senseSeeAlso}"
|
|
||||||
JOIN "JMdict_BaseAndFurigana"
|
|
||||||
ON "${JMdictTableNames.senseSeeAlso}"."xrefEntryId" = "JMdict_BaseAndFurigana"."entryId"
|
|
||||||
WHERE
|
|
||||||
"senseId" IN (${List.filled(senseIds.length, '?').join(',')})
|
|
||||||
AND "JMdict_BaseAndFurigana"."isFirst"
|
|
||||||
ORDER BY
|
|
||||||
"${JMdictTableNames.senseSeeAlso}"."senseId",
|
|
||||||
"${JMdictTableNames.senseSeeAlso}"."xrefEntryId"
|
|
||||||
""",
|
|
||||||
[...senseIds],
|
|
||||||
);
|
|
||||||
|
|
||||||
late final List<Map<String, Object?>> exampleSentences;
|
late final List<Map<String, Object?>> exampleSentences;
|
||||||
final Future<List<Map<String, Object?>>> examplesentencesQuery = connection
|
|
||||||
.query(
|
|
||||||
'JMdict_ExampleSentence',
|
|
||||||
where: 'senseId IN (${List.filled(senseIds.length, '?').join(',')})',
|
|
||||||
whereArgs: senseIds,
|
|
||||||
);
|
|
||||||
|
|
||||||
// Reading queries
|
|
||||||
|
|
||||||
final readingIds = readingElements
|
final readingIds = readingElements
|
||||||
.map((element) => element['elementId'] as int)
|
.map((element) => element['elementId'] as int)
|
||||||
.toList();
|
.toList();
|
||||||
|
|
||||||
late final List<Map<String, Object?>> readingElementInfos;
|
|
||||||
final Future<List<Map<String, Object?>>> readingelementinfosQuery =
|
|
||||||
connection.query(
|
|
||||||
JMdictTableNames.readingInfo,
|
|
||||||
where:
|
|
||||||
'(elementId) IN (${List.filled(readingIds.length, '?').join(',')})',
|
|
||||||
whereArgs: readingIds,
|
|
||||||
);
|
|
||||||
|
|
||||||
late final List<Map<String, Object?>> readingElementRestrictions;
|
|
||||||
final Future<List<Map<String, Object?>>> readingelementrestrictionsQuery =
|
|
||||||
connection.query(
|
|
||||||
JMdictTableNames.readingRestriction,
|
|
||||||
where:
|
|
||||||
'(elementId) IN (${List.filled(readingIds.length, '?').join(',')})',
|
|
||||||
whereArgs: readingIds,
|
|
||||||
);
|
|
||||||
|
|
||||||
// Kanji queries
|
|
||||||
|
|
||||||
final kanjiIds = kanjiElements
|
final kanjiIds = kanjiElements
|
||||||
.map((element) => element['elementId'] as int)
|
.map((element) => element['elementId'] as int)
|
||||||
.toList();
|
.toList();
|
||||||
|
|
||||||
|
late final List<Map<String, Object?>> readingElementInfos;
|
||||||
|
late final List<Map<String, Object?>> readingElementRestrictions;
|
||||||
|
|
||||||
late final List<Map<String, Object?>> kanjiElementInfos;
|
late final List<Map<String, Object?>> kanjiElementInfos;
|
||||||
final Future<List<Map<String, Object?>>> kanjielementinfosQuery = connection
|
|
||||||
.query(
|
|
||||||
JMdictTableNames.kanjiInfo,
|
|
||||||
where:
|
|
||||||
'(elementId) IN (${List.filled(kanjiIds.length, '?').join(',')})',
|
|
||||||
whereArgs: kanjiIds,
|
|
||||||
);
|
|
||||||
|
|
||||||
// Xref data queries
|
// Xref data queries
|
||||||
await Future.wait([
|
await Future.wait([
|
||||||
senseantonymsQuery.then((value) => senseAntonyms = value),
|
_senseantonymsQuery(
|
||||||
senseseealsosQuery.then((value) => senseSeeAlsos = value),
|
connection,
|
||||||
|
senseIds,
|
||||||
|
).then((value) => senseAntonyms = value),
|
||||||
|
_senseseealsosQuery(
|
||||||
|
connection,
|
||||||
|
senseIds,
|
||||||
|
).then((value) => senseSeeAlsos = value),
|
||||||
]);
|
]);
|
||||||
|
|
||||||
late final LinearWordQueryData? senseAntonymData;
|
LinearWordQueryData? senseAntonymData;
|
||||||
final Future<LinearWordQueryData?> senseantonymdataQuery =
|
LinearWordQueryData? senseSeeAlsoData;
|
||||||
fetchXrefData
|
|
||||||
? fetchLinearWordQueryData(
|
|
||||||
connection,
|
|
||||||
senseAntonyms
|
|
||||||
.map((antonym) => antonym['xrefEntryId'] as int)
|
|
||||||
.toList(),
|
|
||||||
fetchXrefData: false,
|
|
||||||
)
|
|
||||||
: Future.value(null);
|
|
||||||
|
|
||||||
late final LinearWordQueryData? senseSeeAlsoData;
|
|
||||||
final Future<LinearWordQueryData?> senseseealsodataQuery =
|
|
||||||
fetchXrefData
|
|
||||||
? fetchLinearWordQueryData(
|
|
||||||
connection,
|
|
||||||
senseSeeAlsos.map((seeAlso) => seeAlso['xrefEntryId'] as int).toList(),
|
|
||||||
fetchXrefData: false,
|
|
||||||
)
|
|
||||||
: Future.value(null);
|
|
||||||
|
|
||||||
await Future.wait([
|
await Future.wait([
|
||||||
sensedialectsQuery.then((value) => senseDialects = value),
|
_sensedialectsQuery(
|
||||||
sensefieldsQuery.then((value) => senseFields = value),
|
connection,
|
||||||
senseglossariesQuery.then((value) => senseGlossaries = value),
|
senseIds,
|
||||||
senseinfosQuery.then((value) => senseInfos = value),
|
).then((value) => senseDialects = value),
|
||||||
senselanguagesourcesQuery.then((value) => senseLanguageSources = value),
|
_sensefieldsQuery(
|
||||||
sensemiscsQuery.then((value) => senseMiscs = value),
|
connection,
|
||||||
sensepossQuery.then((value) => sensePOSs = value),
|
senseIds,
|
||||||
senserestrictedtokanjisQuery.then(
|
).then((value) => senseFields = value),
|
||||||
(value) => senseRestrictedToKanjis = value,
|
_senseglossariesQuery(
|
||||||
),
|
connection,
|
||||||
senserestrictedtoreadingsQuery.then(
|
senseIds,
|
||||||
(value) => senseRestrictedToReadings = value,
|
).then((value) => senseGlossaries = value),
|
||||||
),
|
_senseinfosQuery(connection, senseIds).then((value) => senseInfos = value),
|
||||||
examplesentencesQuery.then((value) => exampleSentences = value),
|
_senselanguagesourcesQuery(
|
||||||
readingelementinfosQuery.then((value) => readingElementInfos = value),
|
connection,
|
||||||
readingelementrestrictionsQuery.then(
|
senseIds,
|
||||||
(value) => readingElementRestrictions = value,
|
).then((value) => senseLanguageSources = value),
|
||||||
),
|
_sensemiscsQuery(connection, senseIds).then((value) => senseMiscs = value),
|
||||||
kanjielementinfosQuery.then((value) => kanjiElementInfos = value),
|
_sensepossQuery(connection, senseIds).then((value) => sensePOSs = value),
|
||||||
senseantonymdataQuery.then((value) => senseAntonymData = value),
|
_senserestrictedtokanjisQuery(
|
||||||
senseseealsodataQuery.then((value) => senseSeeAlsoData = value),
|
connection,
|
||||||
|
senseIds,
|
||||||
|
).then((value) => senseRestrictedToKanjis = value),
|
||||||
|
_senserestrictedtoreadingsQuery(
|
||||||
|
connection,
|
||||||
|
senseIds,
|
||||||
|
).then((value) => senseRestrictedToReadings = value),
|
||||||
|
_examplesentencesQuery(
|
||||||
|
connection,
|
||||||
|
senseIds,
|
||||||
|
).then((value) => exampleSentences = value),
|
||||||
|
_readingelementinfosQuery(
|
||||||
|
connection,
|
||||||
|
readingIds,
|
||||||
|
).then((value) => readingElementInfos = value),
|
||||||
|
_readingelementrestrictionsQuery(
|
||||||
|
connection,
|
||||||
|
readingIds,
|
||||||
|
).then((value) => readingElementRestrictions = value),
|
||||||
|
_kanjielementinfosQuery(
|
||||||
|
connection,
|
||||||
|
kanjiIds,
|
||||||
|
).then((value) => kanjiElementInfos = value),
|
||||||
|
|
||||||
|
if (fetchXrefData)
|
||||||
|
_senseantonymdataQuery(
|
||||||
|
connection,
|
||||||
|
senseAntonyms.map((antonym) => antonym['xrefEntryId'] as int).toList(),
|
||||||
|
).then((value) => senseAntonymData = value),
|
||||||
|
|
||||||
|
if (fetchXrefData)
|
||||||
|
_senseseealsodataQuery(
|
||||||
|
connection,
|
||||||
|
senseSeeAlsos.map((seeAlso) => seeAlso['xrefEntryId'] as int).toList(),
|
||||||
|
).then((value) => senseSeeAlsoData = value),
|
||||||
]);
|
]);
|
||||||
|
|
||||||
return LinearWordQueryData(
|
return LinearWordQueryData(
|
||||||
|
|||||||
@@ -15,15 +15,15 @@ SearchMode _determineSearchMode(String word) {
|
|||||||
final bool containsAscii = RegExp(r'[A-Za-z]').hasMatch(word);
|
final bool containsAscii = RegExp(r'[A-Za-z]').hasMatch(word);
|
||||||
|
|
||||||
if (containsKanji && containsAscii) {
|
if (containsKanji && containsAscii) {
|
||||||
return SearchMode.MixedKanji;
|
return SearchMode.mixedKanji;
|
||||||
} else if (containsKanji) {
|
} else if (containsKanji) {
|
||||||
return SearchMode.Kanji;
|
return SearchMode.kanji;
|
||||||
} else if (containsAscii) {
|
} else if (containsAscii) {
|
||||||
return SearchMode.English;
|
return SearchMode.english;
|
||||||
} else if (word.contains(hiraganaRegex) || word.contains(katakanaRegex)) {
|
} else if (word.contains(hiraganaRegex) || word.contains(katakanaRegex)) {
|
||||||
return SearchMode.Kana;
|
return SearchMode.kana;
|
||||||
} else {
|
} else {
|
||||||
return SearchMode.MixedKana;
|
return SearchMode.mixedKana;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -199,23 +199,23 @@ Future<List<ScoredEntryId>> _queryEnglish(
|
|||||||
SELECT
|
SELECT
|
||||||
"${JMdictTableNames.sense}"."entryId",
|
"${JMdictTableNames.sense}"."entryId",
|
||||||
MAX("JMdict_EntryScore"."score")
|
MAX("JMdict_EntryScore"."score")
|
||||||
+ (("${JMdictTableNames.senseGlossary}"."phrase" = ? AND "${JMdictTableNames.sense}"."orderNum" = 1) * 50)
|
+ (("${JMdictTableNames.senseGlossary}"."phrase" = ?1 AND "${JMdictTableNames.sense}"."orderNum" = 1) * 50)
|
||||||
+ (("${JMdictTableNames.senseGlossary}"."phrase" = ? AND "${JMdictTableNames.sense}"."orderNum" = 2) * 30)
|
+ (("${JMdictTableNames.senseGlossary}"."phrase" = ?1 AND "${JMdictTableNames.sense}"."orderNum" = 2) * 30)
|
||||||
+ (("${JMdictTableNames.senseGlossary}"."phrase" = ?) * 20)
|
+ (("${JMdictTableNames.senseGlossary}"."phrase" = ?1) * 20)
|
||||||
as "score"
|
as "score"
|
||||||
FROM "${JMdictTableNames.senseGlossary}"
|
FROM "${JMdictTableNames.senseGlossary}"
|
||||||
JOIN "${JMdictTableNames.sense}" USING ("senseId")
|
JOIN "${JMdictTableNames.sense}" USING ("senseId")
|
||||||
JOIN "JMdict_EntryScore" USING ("entryId")
|
JOIN "JMdict_EntryScore" USING ("entryId")
|
||||||
WHERE "${JMdictTableNames.senseGlossary}"."phrase" LIKE ?
|
WHERE "${JMdictTableNames.senseGlossary}"."phrase" LIKE ?2
|
||||||
GROUP BY "JMdict_EntryScore"."entryId"
|
GROUP BY "JMdict_EntryScore"."entryId"
|
||||||
ORDER BY
|
ORDER BY
|
||||||
"score" DESC,
|
"score" DESC,
|
||||||
"${JMdictTableNames.sense}"."entryId" ASC
|
"${JMdictTableNames.sense}"."entryId" ASC
|
||||||
LIMIT ?
|
${pageSize != null ? 'LIMIT ?3' : ''}
|
||||||
OFFSET ?
|
${offset != null ? 'OFFSET ?4' : ''}
|
||||||
'''
|
'''
|
||||||
.trim(),
|
.trim(),
|
||||||
[word, word, word, '%${word.replaceAll('%', '')}%', pageSize, offset],
|
[word, '%${word.replaceAll('%', '')}%', if (pageSize != null) pageSize, if (offset != null) offset],
|
||||||
);
|
);
|
||||||
|
|
||||||
return result
|
return result
|
||||||
@@ -246,7 +246,7 @@ Future<List<ScoredEntryId>> fetchEntryIds(
|
|||||||
int? pageSize,
|
int? pageSize,
|
||||||
int? offset,
|
int? offset,
|
||||||
) async {
|
) async {
|
||||||
if (searchMode == SearchMode.Auto) {
|
if (searchMode == SearchMode.auto) {
|
||||||
searchMode = _determineSearchMode(word);
|
searchMode = _determineSearchMode(word);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -254,20 +254,20 @@ Future<List<ScoredEntryId>> fetchEntryIds(
|
|||||||
|
|
||||||
late final List<ScoredEntryId> entryIds;
|
late final List<ScoredEntryId> entryIds;
|
||||||
switch (searchMode) {
|
switch (searchMode) {
|
||||||
case SearchMode.Kanji:
|
case SearchMode.kanji:
|
||||||
entryIds = await _queryKanji(connection, word, pageSize, offset);
|
entryIds = await _queryKanji(connection, word, pageSize, offset);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case SearchMode.Kana:
|
case SearchMode.kana:
|
||||||
entryIds = await _queryKana(connection, word, pageSize, offset);
|
entryIds = await _queryKana(connection, word, pageSize, offset);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case SearchMode.English:
|
case SearchMode.english:
|
||||||
entryIds = await _queryEnglish(connection, word, pageSize, offset);
|
entryIds = await _queryEnglish(connection, word, pageSize, offset);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case SearchMode.MixedKana:
|
case SearchMode.mixedKana:
|
||||||
case SearchMode.MixedKanji:
|
case SearchMode.mixedKanji:
|
||||||
default:
|
default:
|
||||||
throw UnimplementedError('Search mode $searchMode is not implemented');
|
throw UnimplementedError('Search mode $searchMode is not implemented');
|
||||||
}
|
}
|
||||||
@@ -280,7 +280,7 @@ Future<int?> fetchEntryIdCount(
|
|||||||
String word,
|
String word,
|
||||||
SearchMode searchMode,
|
SearchMode searchMode,
|
||||||
) async {
|
) async {
|
||||||
if (searchMode == SearchMode.Auto) {
|
if (searchMode == SearchMode.auto) {
|
||||||
searchMode = _determineSearchMode(word);
|
searchMode = _determineSearchMode(word);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -289,20 +289,20 @@ Future<int?> fetchEntryIdCount(
|
|||||||
late final int? entryIdCount;
|
late final int? entryIdCount;
|
||||||
|
|
||||||
switch (searchMode) {
|
switch (searchMode) {
|
||||||
case SearchMode.Kanji:
|
case SearchMode.kanji:
|
||||||
entryIdCount = await _queryKanjiCount(connection, word);
|
entryIdCount = await _queryKanjiCount(connection, word);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case SearchMode.Kana:
|
case SearchMode.kana:
|
||||||
entryIdCount = await _queryKanaCount(connection, word);
|
entryIdCount = await _queryKanaCount(connection, word);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case SearchMode.English:
|
case SearchMode.english:
|
||||||
entryIdCount = await _queryEnglishCount(connection, word);
|
entryIdCount = await _queryEnglishCount(connection, word);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case SearchMode.MixedKana:
|
case SearchMode.mixedKana:
|
||||||
case SearchMode.MixedKanji:
|
case SearchMode.mixedKanji:
|
||||||
default:
|
default:
|
||||||
throw UnimplementedError('Search mode $searchMode is not implemented');
|
throw UnimplementedError('Search mode $searchMode is not implemented');
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -21,50 +21,84 @@ List<WordSearchResult> regroupWordSearchResults({
|
|||||||
}) {
|
}) {
|
||||||
final List<WordSearchResult> results = [];
|
final List<WordSearchResult> results = [];
|
||||||
|
|
||||||
final commonEntryIds = linearWordQueryData.commonEntries
|
final Set<int> commonEntryIds = linearWordQueryData.commonEntries
|
||||||
.map((entry) => entry['entryId'] as int)
|
.map((entry) => entry['entryId'] as int)
|
||||||
.toSet();
|
.toSet();
|
||||||
|
|
||||||
|
final Map<int, List<Map<String, Object?>>> entryReadingElementsByEntryId =
|
||||||
|
linearWordQueryData.readingElements.groupListsBy(
|
||||||
|
(element) => element['entryId'] as int,
|
||||||
|
);
|
||||||
|
|
||||||
|
final Map<int, List<Map<String, Object?>>> entryKanjiElementsByEntryId =
|
||||||
|
linearWordQueryData.kanjiElements.groupListsBy(
|
||||||
|
(element) => element['entryId'] as int,
|
||||||
|
);
|
||||||
|
|
||||||
|
final Map<int, int> elementIdToEntryId = {
|
||||||
|
for (final element in linearWordQueryData.readingElements)
|
||||||
|
element['elementId'] as int: element['entryId'] as int,
|
||||||
|
for (final element in linearWordQueryData.kanjiElements)
|
||||||
|
element['elementId'] as int: element['entryId'] as int,
|
||||||
|
};
|
||||||
|
|
||||||
|
final Map<int, List<Map<String, Object?>>> entryReadingElementInfosByEntryId =
|
||||||
|
linearWordQueryData.readingElementInfos.groupListsBy(
|
||||||
|
(element) => elementIdToEntryId[element['elementId'] as int]!,
|
||||||
|
);
|
||||||
|
|
||||||
|
final Map<int, List<Map<String, Object?>>> entryKanjiElementInfosByEntryId =
|
||||||
|
linearWordQueryData.kanjiElementInfos.groupListsBy(
|
||||||
|
(element) => elementIdToEntryId[element['elementId'] as int]!,
|
||||||
|
);
|
||||||
|
|
||||||
|
final Map<int, List<Map<String, Object?>>>
|
||||||
|
entryReadingElementRestrictionsByEntryId = linearWordQueryData
|
||||||
|
.readingElementRestrictions
|
||||||
|
.groupListsBy(
|
||||||
|
(element) => elementIdToEntryId[element['elementId'] as int]!,
|
||||||
|
);
|
||||||
|
|
||||||
|
final Map<int, JlptLevel> entryJlptTagsByEntryId = linearWordQueryData
|
||||||
|
.jlptTags
|
||||||
|
.groupSetsBy((element) => element['entryId'] as int)
|
||||||
|
.map(
|
||||||
|
(final key, final value) => MapEntry(
|
||||||
|
key,
|
||||||
|
value.map((e) => JlptLevel.fromString(e['jlptLevel'] as String?)).min,
|
||||||
|
),
|
||||||
|
);
|
||||||
|
|
||||||
|
final Map<int, List<Map<String, Object?>>> entrySensesByEntryId =
|
||||||
|
linearWordQueryData.senses.groupListsBy(
|
||||||
|
(element) => element['entryId'] as int,
|
||||||
|
);
|
||||||
|
|
||||||
for (final scoredEntryId in entryIds) {
|
for (final scoredEntryId in entryIds) {
|
||||||
final List<Map<String, Object?>> entryReadingElements = linearWordQueryData
|
final List<Map<String, Object?>> entryReadingElements =
|
||||||
.readingElements
|
entryReadingElementsByEntryId[scoredEntryId.entryId] ?? const [];
|
||||||
.where((element) => element['entryId'] == scoredEntryId.entryId)
|
final List<Map<String, Object?>> entryKanjiElements =
|
||||||
.toList();
|
entryKanjiElementsByEntryId[scoredEntryId.entryId] ?? const [];
|
||||||
|
final List<Map<String, Object?>> entryReadingElementInfos =
|
||||||
|
entryReadingElementInfosByEntryId[scoredEntryId.entryId] ?? const [];
|
||||||
|
final List<Map<String, Object?>> entryKanjiElementInfos =
|
||||||
|
entryKanjiElementInfosByEntryId[scoredEntryId.entryId] ?? const [];
|
||||||
|
final List<Map<String, Object?>> entryReadingElementRestrictions =
|
||||||
|
entryReadingElementRestrictionsByEntryId[scoredEntryId.entryId] ??
|
||||||
|
const [];
|
||||||
|
|
||||||
final List<Map<String, Object?>> entryKanjiElements = linearWordQueryData
|
final GroupedWordResult entryReadingElementsGrouped = _regroupWords(
|
||||||
.kanjiElements
|
|
||||||
.where((element) => element['entryId'] == scoredEntryId.entryId)
|
|
||||||
.toList();
|
|
||||||
|
|
||||||
final List<Map<String, Object?>> entryJlptTags = linearWordQueryData
|
|
||||||
.jlptTags
|
|
||||||
.where((element) => element['entryId'] == scoredEntryId.entryId)
|
|
||||||
.toList();
|
|
||||||
|
|
||||||
final jlptLevel =
|
|
||||||
entryJlptTags
|
|
||||||
.map((e) => JlptLevel.fromString(e['jlptLevel'] as String?))
|
|
||||||
.sorted((a, b) => b.compareTo(a))
|
|
||||||
.firstOrNull ??
|
|
||||||
JlptLevel.none;
|
|
||||||
|
|
||||||
final isCommon = commonEntryIds.contains(scoredEntryId.entryId);
|
|
||||||
|
|
||||||
final List<Map<String, Object?>> entrySenses = linearWordQueryData.senses
|
|
||||||
.where((element) => element['entryId'] == scoredEntryId.entryId)
|
|
||||||
.toList();
|
|
||||||
|
|
||||||
final GroupedWordResult entryReadingElementsGrouped = _regroup_words(
|
|
||||||
entryId: scoredEntryId.entryId,
|
|
||||||
readingElements: entryReadingElements,
|
|
||||||
kanjiElements: entryKanjiElements,
|
kanjiElements: entryKanjiElements,
|
||||||
readingElementInfos: linearWordQueryData.readingElementInfos,
|
kanjiElementInfos: entryKanjiElementInfos,
|
||||||
readingElementRestrictions:
|
readingElements: entryReadingElements,
|
||||||
linearWordQueryData.readingElementRestrictions,
|
readingElementInfos: entryReadingElementInfos,
|
||||||
kanjiElementInfos: linearWordQueryData.kanjiElementInfos,
|
readingElementRestrictions: entryReadingElementRestrictions,
|
||||||
);
|
);
|
||||||
|
|
||||||
final List<WordSearchSense> entrySensesGrouped = _regroup_senses(
|
final List<Map<String, Object?>> entrySenses =
|
||||||
|
entrySensesByEntryId[scoredEntryId.entryId] ?? const [];
|
||||||
|
|
||||||
|
final List<WordSearchSense> entrySensesGrouped = _regroupSenses(
|
||||||
senses: entrySenses,
|
senses: entrySenses,
|
||||||
senseAntonyms: linearWordQueryData.senseAntonyms,
|
senseAntonyms: linearWordQueryData.senseAntonyms,
|
||||||
senseDialects: linearWordQueryData.senseDialects,
|
senseDialects: linearWordQueryData.senseDialects,
|
||||||
@@ -82,6 +116,10 @@ List<WordSearchResult> regroupWordSearchResults({
|
|||||||
senseAntonymsXrefData: linearWordQueryData.senseAntonymData,
|
senseAntonymsXrefData: linearWordQueryData.senseAntonymData,
|
||||||
);
|
);
|
||||||
|
|
||||||
|
final bool isCommon = commonEntryIds.contains(scoredEntryId.entryId);
|
||||||
|
final JlptLevel jlptLevel =
|
||||||
|
entryJlptTagsByEntryId[scoredEntryId.entryId] ?? JlptLevel.none;
|
||||||
|
|
||||||
results.add(
|
results.add(
|
||||||
WordSearchResult(
|
WordSearchResult(
|
||||||
score: scoredEntryId.score,
|
score: scoredEntryId.score,
|
||||||
@@ -112,8 +150,7 @@ class GroupedWordResult {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
GroupedWordResult _regroup_words({
|
GroupedWordResult _regroupWords({
|
||||||
required int entryId,
|
|
||||||
required List<Map<String, Object?>> kanjiElements,
|
required List<Map<String, Object?>> kanjiElements,
|
||||||
required List<Map<String, Object?>> kanjiElementInfos,
|
required List<Map<String, Object?>> kanjiElementInfos,
|
||||||
required List<Map<String, Object?>> readingElements,
|
required List<Map<String, Object?>> readingElements,
|
||||||
@@ -122,36 +159,34 @@ GroupedWordResult _regroup_words({
|
|||||||
}) {
|
}) {
|
||||||
final List<WordSearchRuby> rubys = [];
|
final List<WordSearchRuby> rubys = [];
|
||||||
|
|
||||||
final kanjiElements_ = kanjiElements
|
final Map<int, Set<String>> readingElementRestrictionsSet =
|
||||||
.where((element) => element['entryId'] == entryId)
|
readingElementRestrictions
|
||||||
.toList();
|
.groupSetsBy((element) => element['elementId'] as int)
|
||||||
|
.map(
|
||||||
|
(key, value) => MapEntry(
|
||||||
|
key,
|
||||||
|
value.map((e) => e['restriction'] as String).toSet(),
|
||||||
|
),
|
||||||
|
);
|
||||||
|
|
||||||
final readingElements_ = readingElements
|
// Construct a cartesian product of kanji + readings, with exceptions made for items marked in `restrictions`.
|
||||||
.where((element) => element['entryId'] == entryId)
|
for (final readingElement in readingElements) {
|
||||||
.toList();
|
if (readingElement['doesNotMatchKanji'] == 1 || kanjiElements.isEmpty) {
|
||||||
|
|
||||||
final readingElementRestrictions_ = readingElementRestrictions
|
|
||||||
.where((element) => element['entryId'] == entryId)
|
|
||||||
.toList();
|
|
||||||
|
|
||||||
for (final readingElement in readingElements_) {
|
|
||||||
if (readingElement['doesNotMatchKanji'] == 1 || kanjiElements_.isEmpty) {
|
|
||||||
final ruby = WordSearchRuby(base: readingElement['reading'] as String);
|
final ruby = WordSearchRuby(base: readingElement['reading'] as String);
|
||||||
rubys.add(ruby);
|
rubys.add(ruby);
|
||||||
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (final kanjiElement in kanjiElements_) {
|
for (final kanjiElement in kanjiElements) {
|
||||||
final kanji = kanjiElement['reading'] as String;
|
final kanji = kanjiElement['reading'] as String;
|
||||||
final reading = readingElement['reading'] as String;
|
final reading = readingElement['reading'] as String;
|
||||||
|
|
||||||
final restrictions = readingElementRestrictions_
|
// The 'restrictions' act as an allowlist, meaning that non-matching kanji elements should be ignored.
|
||||||
.where((element) => element['reading'] == reading)
|
final restrictions =
|
||||||
.toList();
|
readingElementRestrictionsSet[readingElement['elementId'] as int] ??
|
||||||
|
{};
|
||||||
if (restrictions.isNotEmpty &&
|
if (restrictions.isNotEmpty && !restrictions.contains(kanji)) {
|
||||||
!restrictions.any((element) => element['restriction'] == kanji)) {
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -160,42 +195,37 @@ GroupedWordResult _regroup_words({
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
assert(rubys.isNotEmpty, 'No readings found for entryId: $entryId');
|
assert(
|
||||||
|
rubys.isNotEmpty,
|
||||||
|
'No readings found for entryId: ${kanjiElements.firstOrNull?['entryId'] ?? readingElements.firstOrNull?['entryId'] ?? '???'}',
|
||||||
|
);
|
||||||
|
|
||||||
final Map<int, String> readingElementIdsToReading = {
|
final Map<int, String> readingElementIdsToReading = {
|
||||||
for (final element in readingElements_)
|
for (final element in readingElements)
|
||||||
element['elementId'] as int: element['reading'] as String,
|
element['elementId'] as int: element['reading'] as String,
|
||||||
};
|
};
|
||||||
|
|
||||||
final Map<int, String> kanjiElementIdsToReading = {
|
final Map<int, String> kanjiElementIdsToReading = {
|
||||||
for (final element in kanjiElements_)
|
for (final element in kanjiElements)
|
||||||
element['elementId'] as int: element['reading'] as String,
|
element['elementId'] as int: element['reading'] as String,
|
||||||
};
|
};
|
||||||
|
|
||||||
final readingElementInfos_ = readingElementInfos
|
|
||||||
.where((element) => element['entryId'] == entryId)
|
|
||||||
.toList();
|
|
||||||
|
|
||||||
final kanjiElementInfos_ = kanjiElementInfos
|
|
||||||
.where((element) => element['entryId'] == entryId)
|
|
||||||
.toList();
|
|
||||||
|
|
||||||
return GroupedWordResult(
|
return GroupedWordResult(
|
||||||
rubys: rubys,
|
rubys: rubys,
|
||||||
readingInfos: {
|
readingInfos: {
|
||||||
for (final rei in readingElementInfos_)
|
for (final rei in readingElementInfos)
|
||||||
readingElementIdsToReading[rei['elementId'] as int]!:
|
readingElementIdsToReading[rei['elementId'] as int]!:
|
||||||
JMdictReadingInfo.fromId(rei['info'] as String),
|
JMdictReadingInfo.fromId(rei['info'] as String),
|
||||||
},
|
},
|
||||||
kanjiInfos: {
|
kanjiInfos: {
|
||||||
for (final kei in kanjiElementInfos_)
|
for (final kei in kanjiElementInfos)
|
||||||
kanjiElementIdsToReading[kei['elementId'] as int]!:
|
kanjiElementIdsToReading[kei['elementId'] as int]!:
|
||||||
JMdictKanjiInfo.fromId(kei['info'] as String),
|
JMdictKanjiInfo.fromId(kei['info'] as String),
|
||||||
},
|
},
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
List<WordSearchSense> _regroup_senses({
|
List<WordSearchSense> _regroupSenses({
|
||||||
required List<Map<String, Object?>> senses,
|
required List<Map<String, Object?>> senses,
|
||||||
required List<Map<String, Object?>> senseAntonyms,
|
required List<Map<String, Object?>> senseAntonyms,
|
||||||
required List<Map<String, Object?>> senseDialects,
|
required List<Map<String, Object?>> senseDialects,
|
||||||
|
|||||||
@@ -13,13 +13,31 @@ import 'package:jadb/search/word_search/regrouping.dart';
|
|||||||
import 'package:jadb/table_names/jmdict.dart';
|
import 'package:jadb/table_names/jmdict.dart';
|
||||||
import 'package:sqflite_common/sqlite_api.dart';
|
import 'package:sqflite_common/sqlite_api.dart';
|
||||||
|
|
||||||
enum SearchMode { Auto, English, Kanji, MixedKanji, Kana, MixedKana }
|
enum SearchMode {
|
||||||
|
/// Try to autodetect what is being searched for
|
||||||
|
auto,
|
||||||
|
|
||||||
|
/// Search for english words
|
||||||
|
english,
|
||||||
|
|
||||||
|
/// Search for the kanji reading of a word
|
||||||
|
kanji,
|
||||||
|
|
||||||
|
/// Search for the kanji reading of a word, mixed in with kana/romaji
|
||||||
|
mixedKanji,
|
||||||
|
|
||||||
|
/// Search for the kana reading of a word
|
||||||
|
kana,
|
||||||
|
|
||||||
|
/// Search for the kana reading of a word, mixed in with romaji
|
||||||
|
mixedKana,
|
||||||
|
}
|
||||||
|
|
||||||
/// Searches for an input string, returning a list of results with their details. Returns null if the input string is empty.
|
/// Searches for an input string, returning a list of results with their details. Returns null if the input string is empty.
|
||||||
Future<List<WordSearchResult>?> searchWordWithDbConnection(
|
Future<List<WordSearchResult>?> searchWordWithDbConnection(
|
||||||
DatabaseExecutor connection,
|
DatabaseExecutor connection,
|
||||||
String word, {
|
String word, {
|
||||||
SearchMode searchMode = SearchMode.Auto,
|
SearchMode searchMode = SearchMode.auto,
|
||||||
int page = 0,
|
int page = 0,
|
||||||
int? pageSize,
|
int? pageSize,
|
||||||
}) async {
|
}) async {
|
||||||
@@ -63,7 +81,7 @@ Future<List<WordSearchResult>?> searchWordWithDbConnection(
|
|||||||
Future<int?> searchWordCountWithDbConnection(
|
Future<int?> searchWordCountWithDbConnection(
|
||||||
DatabaseExecutor connection,
|
DatabaseExecutor connection,
|
||||||
String word, {
|
String word, {
|
||||||
SearchMode searchMode = SearchMode.Auto,
|
SearchMode searchMode = SearchMode.auto,
|
||||||
}) async {
|
}) async {
|
||||||
if (word.isEmpty) {
|
if (word.isEmpty) {
|
||||||
return null;
|
return null;
|
||||||
|
|||||||
@@ -1,4 +1,5 @@
|
|||||||
abstract class JMdictTableNames {
|
abstract class JMdictTableNames {
|
||||||
|
static const String version = 'JMdict_Version';
|
||||||
static const String entry = 'JMdict_Entry';
|
static const String entry = 'JMdict_Entry';
|
||||||
static const String kanjiElement = 'JMdict_KanjiElement';
|
static const String kanjiElement = 'JMdict_KanjiElement';
|
||||||
static const String kanjiInfo = 'JMdict_KanjiElementInfo';
|
static const String kanjiInfo = 'JMdict_KanjiElementInfo';
|
||||||
@@ -20,6 +21,7 @@ abstract class JMdictTableNames {
|
|||||||
static const String senseSeeAlso = 'JMdict_SenseSeeAlso';
|
static const String senseSeeAlso = 'JMdict_SenseSeeAlso';
|
||||||
|
|
||||||
static Set<String> get allTables => {
|
static Set<String> get allTables => {
|
||||||
|
version,
|
||||||
entry,
|
entry,
|
||||||
kanjiElement,
|
kanjiElement,
|
||||||
kanjiInfo,
|
kanjiInfo,
|
||||||
|
|||||||
@@ -1,4 +1,5 @@
|
|||||||
abstract class KANJIDICTableNames {
|
abstract class KANJIDICTableNames {
|
||||||
|
static const String version = 'KANJIDIC_Version';
|
||||||
static const String character = 'KANJIDIC_Character';
|
static const String character = 'KANJIDIC_Character';
|
||||||
static const String radicalName = 'KANJIDIC_RadicalName';
|
static const String radicalName = 'KANJIDIC_RadicalName';
|
||||||
static const String codepoint = 'KANJIDIC_Codepoint';
|
static const String codepoint = 'KANJIDIC_Codepoint';
|
||||||
@@ -17,6 +18,7 @@ abstract class KANJIDICTableNames {
|
|||||||
static const String nanori = 'KANJIDIC_Nanori';
|
static const String nanori = 'KANJIDIC_Nanori';
|
||||||
|
|
||||||
static Set<String> get allTables => {
|
static Set<String> get allTables => {
|
||||||
|
version,
|
||||||
character,
|
character,
|
||||||
radicalName,
|
radicalName,
|
||||||
codepoint,
|
codepoint,
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
abstract class RADKFILETableNames {
|
abstract class RADKFILETableNames {
|
||||||
|
static const String version = 'RADKFILE_Version';
|
||||||
static const String radkfile = 'RADKFILE';
|
static const String radkfile = 'RADKFILE';
|
||||||
|
|
||||||
static Set<String> get allTables => {radkfile};
|
static Set<String> get allTables => {version, radkfile};
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
abstract class TanosJLPTTableNames {
|
abstract class TanosJLPTTableNames {
|
||||||
|
static const String version = 'JMdict_JLPT_Version';
|
||||||
static const String jlptTag = 'JMdict_JLPTTag';
|
static const String jlptTag = 'JMdict_JLPTTag';
|
||||||
|
|
||||||
static Set<String> get allTables => {jlptTag};
|
static Set<String> get allTables => {version, jlptTag};
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,3 +1,4 @@
|
|||||||
|
import 'package:collection/collection.dart';
|
||||||
import 'package:jadb/util/lemmatizer/rules.dart';
|
import 'package:jadb/util/lemmatizer/rules.dart';
|
||||||
|
|
||||||
enum WordClass {
|
enum WordClass {
|
||||||
@@ -10,6 +11,8 @@ enum WordClass {
|
|||||||
adverb,
|
adverb,
|
||||||
particle,
|
particle,
|
||||||
input,
|
input,
|
||||||
|
|
||||||
|
// TODO: add toString and fromString so it can be parsed by the cli
|
||||||
}
|
}
|
||||||
|
|
||||||
enum LemmatizationRuleType { prefix, suffix }
|
enum LemmatizationRuleType { prefix, suffix }
|
||||||
@@ -18,7 +21,7 @@ class LemmatizationRule {
|
|||||||
final String name;
|
final String name;
|
||||||
final AllomorphPattern pattern;
|
final AllomorphPattern pattern;
|
||||||
final WordClass wordClass;
|
final WordClass wordClass;
|
||||||
final List<WordClass>? validChildClasses;
|
final Set<WordClass>? validChildClasses;
|
||||||
final bool terminal;
|
final bool terminal;
|
||||||
|
|
||||||
const LemmatizationRule({
|
const LemmatizationRule({
|
||||||
@@ -38,9 +41,9 @@ class LemmatizationRule {
|
|||||||
required String pattern,
|
required String pattern,
|
||||||
required String? replacement,
|
required String? replacement,
|
||||||
required WordClass wordClass,
|
required WordClass wordClass,
|
||||||
validChildClasses,
|
Set<WordClass>? validChildClasses,
|
||||||
terminal = false,
|
bool terminal = false,
|
||||||
lookAheadBehind = const [''],
|
List<Pattern> lookAheadBehind = const [''],
|
||||||
LemmatizationRuleType type = LemmatizationRuleType.suffix,
|
LemmatizationRuleType type = LemmatizationRuleType.suffix,
|
||||||
}) : this(
|
}) : this(
|
||||||
name: name,
|
name: name,
|
||||||
@@ -55,6 +58,27 @@ class LemmatizationRule {
|
|||||||
terminal: terminal,
|
terminal: terminal,
|
||||||
wordClass: wordClass,
|
wordClass: wordClass,
|
||||||
);
|
);
|
||||||
|
|
||||||
|
@override
|
||||||
|
int get hashCode => Object.hash(
|
||||||
|
name,
|
||||||
|
pattern,
|
||||||
|
wordClass,
|
||||||
|
validChildClasses,
|
||||||
|
terminal,
|
||||||
|
SetEquality().hash(validChildClasses),
|
||||||
|
);
|
||||||
|
|
||||||
|
@override
|
||||||
|
bool operator ==(Object other) {
|
||||||
|
if (identical(this, other)) return true;
|
||||||
|
return other is LemmatizationRule &&
|
||||||
|
other.name == name &&
|
||||||
|
other.pattern == pattern &&
|
||||||
|
other.wordClass == wordClass &&
|
||||||
|
other.terminal == terminal &&
|
||||||
|
SetEquality().equals(validChildClasses, other.validChildClasses);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Represents a set of patterns for matching allomorphs in a word.
|
/// Represents a set of patterns for matching allomorphs in a word.
|
||||||
@@ -71,6 +95,7 @@ class AllomorphPattern {
|
|||||||
this.lookAheadBehind = const [''],
|
this.lookAheadBehind = const [''],
|
||||||
});
|
});
|
||||||
|
|
||||||
|
/// Convert the [patterns] into regexes
|
||||||
List<(String, Pattern)> get allPatternCombinations {
|
List<(String, Pattern)> get allPatternCombinations {
|
||||||
final combinations = <(String, Pattern)>[];
|
final combinations = <(String, Pattern)>[];
|
||||||
for (final l in lookAheadBehind) {
|
for (final l in lookAheadBehind) {
|
||||||
@@ -94,6 +119,7 @@ class AllomorphPattern {
|
|||||||
return combinations;
|
return combinations;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Check whether an input string matches any of the [patterns]
|
||||||
bool matches(String word) {
|
bool matches(String word) {
|
||||||
for (final (_, p) in allPatternCombinations) {
|
for (final (_, p) in allPatternCombinations) {
|
||||||
if (p is String) {
|
if (p is String) {
|
||||||
@@ -111,6 +137,9 @@ class AllomorphPattern {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Apply the replacement for this pattern.
|
||||||
|
///
|
||||||
|
/// If none of the [patterns] apply, this function returns `null`.
|
||||||
List<String>? apply(String word) {
|
List<String>? apply(String word) {
|
||||||
for (final (affix, p) in allPatternCombinations) {
|
for (final (affix, p) in allPatternCombinations) {
|
||||||
switch ((type, p is RegExp)) {
|
switch ((type, p is RegExp)) {
|
||||||
@@ -157,6 +186,22 @@ class AllomorphPattern {
|
|||||||
}
|
}
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@override
|
||||||
|
int get hashCode => Object.hash(
|
||||||
|
type,
|
||||||
|
ListEquality().hash(lookAheadBehind),
|
||||||
|
MapEquality().hash(patterns),
|
||||||
|
);
|
||||||
|
|
||||||
|
@override
|
||||||
|
bool operator ==(Object other) {
|
||||||
|
if (identical(this, other)) return true;
|
||||||
|
return other is AllomorphPattern &&
|
||||||
|
other.type == type &&
|
||||||
|
ListEquality().equals(other.lookAheadBehind, lookAheadBehind) &&
|
||||||
|
MapEquality().equals(other.patterns, patterns);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
class Lemmatized {
|
class Lemmatized {
|
||||||
@@ -203,9 +248,10 @@ List<Lemmatized> _lemmatize(LemmatizationRule parentRule, String word) {
|
|||||||
|
|
||||||
final filteredLemmatizationRules = parentRule.validChildClasses == null
|
final filteredLemmatizationRules = parentRule.validChildClasses == null
|
||||||
? lemmatizationRules
|
? lemmatizationRules
|
||||||
: lemmatizationRules.where(
|
: [
|
||||||
(r) => parentRule.validChildClasses!.contains(r.wordClass),
|
for (final wordClass in parentRule.validChildClasses!)
|
||||||
);
|
...lemmatizationRulesByWordClass[wordClass]!,
|
||||||
|
];
|
||||||
|
|
||||||
for (final rule in filteredLemmatizationRules) {
|
for (final rule in filteredLemmatizationRules) {
|
||||||
if (rule.matches(word)) {
|
if (rule.matches(word)) {
|
||||||
|
|||||||
@@ -1,10 +1,17 @@
|
|||||||
import 'package:jadb/util/lemmatizer/lemmatizer.dart';
|
import 'package:jadb/util/lemmatizer/lemmatizer.dart';
|
||||||
import 'package:jadb/util/lemmatizer/rules/godan-verbs.dart';
|
import 'package:jadb/util/lemmatizer/rules/godan_verbs.dart';
|
||||||
import 'package:jadb/util/lemmatizer/rules/i-adjectives.dart';
|
import 'package:jadb/util/lemmatizer/rules/i_adjectives.dart';
|
||||||
import 'package:jadb/util/lemmatizer/rules/ichidan-verbs.dart';
|
import 'package:jadb/util/lemmatizer/rules/ichidan_verbs.dart';
|
||||||
|
|
||||||
List<LemmatizationRule> lemmatizationRules = [
|
final List<LemmatizationRule> lemmatizationRules = List.unmodifiable([
|
||||||
...ichidanVerbLemmatizationRules,
|
...ichidanVerbLemmatizationRules,
|
||||||
...godanVerbLemmatizationRules,
|
...godanVerbLemmatizationRules,
|
||||||
...iAdjectiveLemmatizationRules,
|
...iAdjectiveLemmatizationRules,
|
||||||
];
|
]);
|
||||||
|
|
||||||
|
final Map<WordClass, List<LemmatizationRule>> lemmatizationRulesByWordClass =
|
||||||
|
Map.unmodifiable({
|
||||||
|
WordClass.ichidanVerb: ichidanVerbLemmatizationRules,
|
||||||
|
WordClass.iAdjective: iAdjectiveLemmatizationRules,
|
||||||
|
WordClass.godanVerb: godanVerbLemmatizationRules,
|
||||||
|
});
|
||||||
|
|||||||
@@ -1,457 +0,0 @@
|
|||||||
import 'package:jadb/util/lemmatizer/lemmatizer.dart';
|
|
||||||
|
|
||||||
List<LemmatizationRule> godanVerbLemmatizationRules = [
|
|
||||||
LemmatizationRule(
|
|
||||||
name: 'Godan verb - base form',
|
|
||||||
terminal: true,
|
|
||||||
pattern: AllomorphPattern(
|
|
||||||
patterns: {
|
|
||||||
'う': ['う'],
|
|
||||||
'く': ['く'],
|
|
||||||
'ぐ': ['ぐ'],
|
|
||||||
'す': ['す'],
|
|
||||||
'つ': ['つ'],
|
|
||||||
'ぬ': ['ぬ'],
|
|
||||||
'ぶ': ['ぶ'],
|
|
||||||
'む': ['む'],
|
|
||||||
'る': ['る'],
|
|
||||||
},
|
|
||||||
type: LemmatizationRuleType.suffix,
|
|
||||||
),
|
|
||||||
validChildClasses: [WordClass.godanVerb],
|
|
||||||
wordClass: WordClass.godanVerb,
|
|
||||||
),
|
|
||||||
LemmatizationRule(
|
|
||||||
name: 'Godan verb - negative form',
|
|
||||||
pattern: AllomorphPattern(
|
|
||||||
patterns: {
|
|
||||||
'わない': ['う'],
|
|
||||||
'かない': ['く'],
|
|
||||||
'がない': ['ぐ'],
|
|
||||||
'さない': ['す'],
|
|
||||||
'たない': ['つ'],
|
|
||||||
'なない': ['ぬ'],
|
|
||||||
'ばない': ['ぶ'],
|
|
||||||
'まない': ['む'],
|
|
||||||
'らない': ['る'],
|
|
||||||
},
|
|
||||||
type: LemmatizationRuleType.suffix,
|
|
||||||
),
|
|
||||||
validChildClasses: [WordClass.godanVerb],
|
|
||||||
wordClass: WordClass.godanVerb,
|
|
||||||
),
|
|
||||||
LemmatizationRule(
|
|
||||||
name: 'Godan verb - past form',
|
|
||||||
pattern: AllomorphPattern(
|
|
||||||
patterns: {
|
|
||||||
'した': ['す'],
|
|
||||||
'った': ['る', 'つ', 'う'],
|
|
||||||
'んだ': ['む', 'ぬ', 'ぶ'],
|
|
||||||
'いだ': ['ぐ'],
|
|
||||||
'いた': ['く'],
|
|
||||||
},
|
|
||||||
type: LemmatizationRuleType.suffix,
|
|
||||||
),
|
|
||||||
validChildClasses: [WordClass.godanVerb],
|
|
||||||
wordClass: WordClass.godanVerb,
|
|
||||||
),
|
|
||||||
LemmatizationRule(
|
|
||||||
name: 'Godan verb - te-form',
|
|
||||||
pattern: AllomorphPattern(
|
|
||||||
patterns: {
|
|
||||||
'いて': ['く', 'ぐ'],
|
|
||||||
'して': ['す'],
|
|
||||||
'って': ['る', 'つ', 'う'],
|
|
||||||
'んで': ['む', 'ぬ', 'ぶ'],
|
|
||||||
},
|
|
||||||
type: LemmatizationRuleType.suffix,
|
|
||||||
),
|
|
||||||
validChildClasses: [WordClass.godanVerb],
|
|
||||||
wordClass: WordClass.godanVerb,
|
|
||||||
),
|
|
||||||
LemmatizationRule(
|
|
||||||
name: 'Godan verb - te-form with いる',
|
|
||||||
pattern: AllomorphPattern(
|
|
||||||
patterns: {
|
|
||||||
'いている': ['く', 'ぐ'],
|
|
||||||
'している': ['す'],
|
|
||||||
'っている': ['る', 'つ', 'う'],
|
|
||||||
'んでいる': ['む', 'ぬ', 'ぶ'],
|
|
||||||
},
|
|
||||||
type: LemmatizationRuleType.suffix,
|
|
||||||
),
|
|
||||||
validChildClasses: [WordClass.godanVerb],
|
|
||||||
wordClass: WordClass.godanVerb,
|
|
||||||
),
|
|
||||||
LemmatizationRule(
|
|
||||||
name: 'Godan verb - te-form with いた',
|
|
||||||
pattern: AllomorphPattern(
|
|
||||||
patterns: {
|
|
||||||
'いていた': ['く', 'ぐ'],
|
|
||||||
'していた': ['す'],
|
|
||||||
'っていた': ['る', 'つ', 'う'],
|
|
||||||
'んでいた': ['む', 'ぬ', 'ぶ'],
|
|
||||||
},
|
|
||||||
type: LemmatizationRuleType.suffix,
|
|
||||||
),
|
|
||||||
validChildClasses: [WordClass.godanVerb],
|
|
||||||
wordClass: WordClass.godanVerb,
|
|
||||||
),
|
|
||||||
LemmatizationRule(
|
|
||||||
name: 'Godan verb - conditional form',
|
|
||||||
pattern: AllomorphPattern(
|
|
||||||
patterns: {
|
|
||||||
'けば': ['く'],
|
|
||||||
'げば': ['ぐ'],
|
|
||||||
'せば': ['す'],
|
|
||||||
'てば': ['つ', 'る', 'う'],
|
|
||||||
'ねば': ['ぬ'],
|
|
||||||
'べば': ['ぶ'],
|
|
||||||
'めば': ['む'],
|
|
||||||
},
|
|
||||||
type: LemmatizationRuleType.suffix,
|
|
||||||
),
|
|
||||||
validChildClasses: [WordClass.godanVerb],
|
|
||||||
wordClass: WordClass.godanVerb,
|
|
||||||
),
|
|
||||||
LemmatizationRule(
|
|
||||||
name: 'Godan verb - volitional form',
|
|
||||||
pattern: AllomorphPattern(
|
|
||||||
patterns: {
|
|
||||||
'おう': ['う'],
|
|
||||||
'こう': ['く'],
|
|
||||||
'ごう': ['ぐ'],
|
|
||||||
'そう': ['す'],
|
|
||||||
'とう': ['つ', 'る', 'う'],
|
|
||||||
'のう': ['ぬ'],
|
|
||||||
'ぼう': ['ぶ'],
|
|
||||||
'もう': ['む'],
|
|
||||||
},
|
|
||||||
type: LemmatizationRuleType.suffix,
|
|
||||||
),
|
|
||||||
validChildClasses: [WordClass.godanVerb],
|
|
||||||
wordClass: WordClass.godanVerb,
|
|
||||||
),
|
|
||||||
LemmatizationRule(
|
|
||||||
name: 'Godan verb - potential form',
|
|
||||||
pattern: AllomorphPattern(
|
|
||||||
patterns: {
|
|
||||||
'ける': ['く'],
|
|
||||||
'げる': ['ぐ'],
|
|
||||||
'せる': ['す'],
|
|
||||||
'てる': ['つ', 'る', 'う'],
|
|
||||||
'ねる': ['ぬ'],
|
|
||||||
'べる': ['ぶ'],
|
|
||||||
'める': ['む'],
|
|
||||||
},
|
|
||||||
type: LemmatizationRuleType.suffix,
|
|
||||||
),
|
|
||||||
validChildClasses: [WordClass.godanVerb],
|
|
||||||
wordClass: WordClass.godanVerb,
|
|
||||||
),
|
|
||||||
LemmatizationRule(
|
|
||||||
name: 'Godan verb - passive form',
|
|
||||||
pattern: AllomorphPattern(
|
|
||||||
patterns: {
|
|
||||||
'かれる': ['く'],
|
|
||||||
'がれる': ['ぐ'],
|
|
||||||
'される': ['す'],
|
|
||||||
'たれる': ['つ', 'る', 'う'],
|
|
||||||
'なれる': ['ぬ'],
|
|
||||||
'ばれる': ['ぶ'],
|
|
||||||
'まれる': ['む'],
|
|
||||||
},
|
|
||||||
type: LemmatizationRuleType.suffix,
|
|
||||||
),
|
|
||||||
validChildClasses: [WordClass.godanVerb],
|
|
||||||
wordClass: WordClass.godanVerb,
|
|
||||||
),
|
|
||||||
LemmatizationRule(
|
|
||||||
name: 'Godan verb - causative form',
|
|
||||||
pattern: AllomorphPattern(
|
|
||||||
patterns: {
|
|
||||||
'かせる': ['く'],
|
|
||||||
'がせる': ['ぐ'],
|
|
||||||
'させる': ['す'],
|
|
||||||
'たせる': ['つ', 'る', 'う'],
|
|
||||||
'なせる': ['ぬ'],
|
|
||||||
'ばせる': ['ぶ'],
|
|
||||||
'ませる': ['む'],
|
|
||||||
},
|
|
||||||
type: LemmatizationRuleType.suffix,
|
|
||||||
),
|
|
||||||
validChildClasses: [WordClass.godanVerb],
|
|
||||||
wordClass: WordClass.godanVerb,
|
|
||||||
),
|
|
||||||
LemmatizationRule(
|
|
||||||
name: 'Godan verb - causative-passive form',
|
|
||||||
pattern: AllomorphPattern(
|
|
||||||
patterns: {
|
|
||||||
'かされる': ['く'],
|
|
||||||
'がされる': ['ぐ'],
|
|
||||||
'される': ['す'],
|
|
||||||
'たされる': ['つ', 'る', 'う'],
|
|
||||||
'なされる': ['ぬ'],
|
|
||||||
'ばされる': ['ぶ'],
|
|
||||||
'まされる': ['む'],
|
|
||||||
},
|
|
||||||
type: LemmatizationRuleType.suffix,
|
|
||||||
),
|
|
||||||
validChildClasses: [WordClass.godanVerb],
|
|
||||||
wordClass: WordClass.godanVerb,
|
|
||||||
),
|
|
||||||
LemmatizationRule(
|
|
||||||
name: 'Godan verb - imperative form',
|
|
||||||
pattern: AllomorphPattern(
|
|
||||||
patterns: {
|
|
||||||
'え': ['う'],
|
|
||||||
'け': ['く'],
|
|
||||||
'げ': ['ぐ'],
|
|
||||||
'せ': ['す'],
|
|
||||||
'て': ['つ', 'る', 'う'],
|
|
||||||
'ね': ['ぬ'],
|
|
||||||
'べ': ['ぶ'],
|
|
||||||
'め': ['む'],
|
|
||||||
},
|
|
||||||
type: LemmatizationRuleType.suffix,
|
|
||||||
),
|
|
||||||
validChildClasses: [WordClass.godanVerb],
|
|
||||||
wordClass: WordClass.godanVerb,
|
|
||||||
),
|
|
||||||
LemmatizationRule(
|
|
||||||
name: 'Godan verb - negative past form',
|
|
||||||
pattern: AllomorphPattern(
|
|
||||||
patterns: {
|
|
||||||
'わなかった': ['う'],
|
|
||||||
'かなかった': ['く'],
|
|
||||||
'がなかった': ['ぐ'],
|
|
||||||
'さなかった': ['す'],
|
|
||||||
'たなかった': ['つ'],
|
|
||||||
'ななかった': ['ぬ'],
|
|
||||||
'ばなかった': ['ぶ'],
|
|
||||||
'まなかった': ['む'],
|
|
||||||
'らなかった': ['る'],
|
|
||||||
},
|
|
||||||
type: LemmatizationRuleType.suffix,
|
|
||||||
),
|
|
||||||
validChildClasses: [WordClass.godanVerb],
|
|
||||||
wordClass: WordClass.godanVerb,
|
|
||||||
),
|
|
||||||
LemmatizationRule(
|
|
||||||
name: 'Godan verb - negative te-form',
|
|
||||||
pattern: AllomorphPattern(
|
|
||||||
patterns: {
|
|
||||||
'わなくて': ['う'],
|
|
||||||
'かなくて': ['く'],
|
|
||||||
'がなくて': ['ぐ'],
|
|
||||||
'さなくて': ['す'],
|
|
||||||
'たなくて': ['つ'],
|
|
||||||
'ななくて': ['ぬ'],
|
|
||||||
'ばなくて': ['ぶ'],
|
|
||||||
'まなくて': ['む'],
|
|
||||||
'らなくて': ['る'],
|
|
||||||
},
|
|
||||||
type: LemmatizationRuleType.suffix,
|
|
||||||
),
|
|
||||||
validChildClasses: [WordClass.godanVerb],
|
|
||||||
wordClass: WordClass.godanVerb,
|
|
||||||
),
|
|
||||||
LemmatizationRule(
|
|
||||||
name: 'Godan verb - negative conditional form',
|
|
||||||
pattern: AllomorphPattern(
|
|
||||||
patterns: {
|
|
||||||
'わなければ': ['う'],
|
|
||||||
'かなければ': ['く'],
|
|
||||||
'がなければ': ['ぐ'],
|
|
||||||
'さなければ': ['す'],
|
|
||||||
'たなければ': ['つ'],
|
|
||||||
'ななければ': ['ぬ'],
|
|
||||||
'ばなければ': ['ぶ'],
|
|
||||||
'まなければ': ['む'],
|
|
||||||
'らなければ': ['る'],
|
|
||||||
},
|
|
||||||
type: LemmatizationRuleType.suffix,
|
|
||||||
),
|
|
||||||
validChildClasses: [WordClass.godanVerb],
|
|
||||||
wordClass: WordClass.godanVerb,
|
|
||||||
),
|
|
||||||
LemmatizationRule(
|
|
||||||
name: 'Godan verb - negative volitional form',
|
|
||||||
pattern: AllomorphPattern(
|
|
||||||
patterns: {
|
|
||||||
'うまい': ['う'],
|
|
||||||
'くまい': ['く'],
|
|
||||||
'ぐまい': ['ぐ'],
|
|
||||||
'すまい': ['す'],
|
|
||||||
'つまい': ['つ', 'る', 'う'],
|
|
||||||
'ぬまい': ['ぬ'],
|
|
||||||
'ぶまい': ['ぶ'],
|
|
||||||
'むまい': ['む'],
|
|
||||||
},
|
|
||||||
type: LemmatizationRuleType.suffix,
|
|
||||||
),
|
|
||||||
validChildClasses: [WordClass.godanVerb],
|
|
||||||
wordClass: WordClass.godanVerb,
|
|
||||||
),
|
|
||||||
LemmatizationRule(
|
|
||||||
name: 'Godan verb - negative potential form',
|
|
||||||
pattern: AllomorphPattern(
|
|
||||||
patterns: {
|
|
||||||
'けない': ['く'],
|
|
||||||
'げない': ['ぐ'],
|
|
||||||
'せない': ['す'],
|
|
||||||
'てない': ['つ', 'る', 'う'],
|
|
||||||
'ねない': ['ぬ'],
|
|
||||||
'べない': ['ぶ'],
|
|
||||||
'めない': ['む'],
|
|
||||||
},
|
|
||||||
type: LemmatizationRuleType.suffix,
|
|
||||||
),
|
|
||||||
validChildClasses: [WordClass.godanVerb],
|
|
||||||
wordClass: WordClass.godanVerb,
|
|
||||||
),
|
|
||||||
LemmatizationRule(
|
|
||||||
name: 'Godan verb - negative passive form',
|
|
||||||
pattern: AllomorphPattern(
|
|
||||||
patterns: {
|
|
||||||
'かれない': ['く'],
|
|
||||||
'がれない': ['ぐ'],
|
|
||||||
'されない': ['す'],
|
|
||||||
'たれない': ['つ', 'る', 'う'],
|
|
||||||
'なれない': ['ぬ'],
|
|
||||||
'ばれない': ['ぶ'],
|
|
||||||
'まれない': ['む'],
|
|
||||||
},
|
|
||||||
type: LemmatizationRuleType.suffix,
|
|
||||||
),
|
|
||||||
validChildClasses: [WordClass.godanVerb],
|
|
||||||
wordClass: WordClass.godanVerb,
|
|
||||||
),
|
|
||||||
LemmatizationRule(
|
|
||||||
name: 'Godan verb - negative causative form',
|
|
||||||
pattern: AllomorphPattern(
|
|
||||||
patterns: {
|
|
||||||
'かせない': ['く'],
|
|
||||||
'がせない': ['ぐ'],
|
|
||||||
'させない': ['す'],
|
|
||||||
'たせない': ['つ', 'る', 'う'],
|
|
||||||
'なせない': ['ぬ'],
|
|
||||||
'ばせない': ['ぶ'],
|
|
||||||
'ませない': ['む'],
|
|
||||||
},
|
|
||||||
type: LemmatizationRuleType.suffix,
|
|
||||||
),
|
|
||||||
validChildClasses: [WordClass.godanVerb],
|
|
||||||
wordClass: WordClass.godanVerb,
|
|
||||||
),
|
|
||||||
LemmatizationRule(
|
|
||||||
name: 'Godan verb - negative causative-passive form',
|
|
||||||
pattern: AllomorphPattern(
|
|
||||||
patterns: {
|
|
||||||
'かされない': ['く'],
|
|
||||||
'がされない': ['ぐ'],
|
|
||||||
'されない': ['す'],
|
|
||||||
'たされない': ['つ', 'る', 'う'],
|
|
||||||
'なされない': ['ぬ'],
|
|
||||||
'ばされない': ['ぶ'],
|
|
||||||
'まされない': ['む'],
|
|
||||||
},
|
|
||||||
type: LemmatizationRuleType.suffix,
|
|
||||||
),
|
|
||||||
validChildClasses: [WordClass.godanVerb],
|
|
||||||
wordClass: WordClass.godanVerb,
|
|
||||||
),
|
|
||||||
LemmatizationRule(
|
|
||||||
name: 'Godan verb - negative imperative form',
|
|
||||||
pattern: AllomorphPattern(
|
|
||||||
patterns: {
|
|
||||||
'うな': ['う'],
|
|
||||||
'くな': ['く'],
|
|
||||||
'ぐな': ['ぐ'],
|
|
||||||
'すな': ['す'],
|
|
||||||
'つな': ['つ'],
|
|
||||||
'ぬな': ['ぬ'],
|
|
||||||
'ぶな': ['ぶ'],
|
|
||||||
'むな': ['む'],
|
|
||||||
'るな': ['る'],
|
|
||||||
},
|
|
||||||
type: LemmatizationRuleType.suffix,
|
|
||||||
),
|
|
||||||
validChildClasses: [WordClass.godanVerb],
|
|
||||||
wordClass: WordClass.godanVerb,
|
|
||||||
),
|
|
||||||
LemmatizationRule(
|
|
||||||
name: 'Godan verb - desire form',
|
|
||||||
pattern: AllomorphPattern(
|
|
||||||
patterns: {
|
|
||||||
'きたい': ['く'],
|
|
||||||
'ぎたい': ['ぐ'],
|
|
||||||
'したい': ['す'],
|
|
||||||
'ちたい': ['つ'],
|
|
||||||
'にたい': ['ぬ'],
|
|
||||||
'びたい': ['ぶ'],
|
|
||||||
'みたい': ['む'],
|
|
||||||
'りたい': ['る'],
|
|
||||||
},
|
|
||||||
type: LemmatizationRuleType.suffix,
|
|
||||||
),
|
|
||||||
validChildClasses: [WordClass.godanVerb],
|
|
||||||
wordClass: WordClass.godanVerb,
|
|
||||||
),
|
|
||||||
LemmatizationRule(
|
|
||||||
name: 'Godan verb - negative desire form',
|
|
||||||
pattern: AllomorphPattern(
|
|
||||||
patterns: {
|
|
||||||
'いたくない': ['う'],
|
|
||||||
'きたくない': ['く'],
|
|
||||||
'ぎたくない': ['ぐ'],
|
|
||||||
'したくない': ['す'],
|
|
||||||
'ちたくない': ['つ'],
|
|
||||||
'にたくない': ['ぬ'],
|
|
||||||
'びたくない': ['ぶ'],
|
|
||||||
'みたくない': ['む'],
|
|
||||||
'りたくない': ['る'],
|
|
||||||
},
|
|
||||||
type: LemmatizationRuleType.suffix,
|
|
||||||
),
|
|
||||||
validChildClasses: [WordClass.godanVerb],
|
|
||||||
wordClass: WordClass.godanVerb,
|
|
||||||
),
|
|
||||||
LemmatizationRule(
|
|
||||||
name: 'Godan verb - past desire form',
|
|
||||||
pattern: AllomorphPattern(
|
|
||||||
patterns: {
|
|
||||||
'きたかった': ['く'],
|
|
||||||
'ぎたかった': ['ぐ'],
|
|
||||||
'したかった': ['す'],
|
|
||||||
'ちたかった': ['つ'],
|
|
||||||
'にたかった': ['ぬ'],
|
|
||||||
'びたかった': ['ぶ'],
|
|
||||||
'みたかった': ['む'],
|
|
||||||
'りたかった': ['る'],
|
|
||||||
},
|
|
||||||
type: LemmatizationRuleType.suffix,
|
|
||||||
),
|
|
||||||
validChildClasses: [WordClass.godanVerb],
|
|
||||||
wordClass: WordClass.godanVerb,
|
|
||||||
),
|
|
||||||
LemmatizationRule(
|
|
||||||
name: 'Godan verb - negative past desire form',
|
|
||||||
pattern: AllomorphPattern(
|
|
||||||
patterns: {
|
|
||||||
'いたくなかった': ['う'],
|
|
||||||
'きたくなかった': ['く'],
|
|
||||||
'ぎたくなかった': ['ぐ'],
|
|
||||||
'したくなかった': ['す'],
|
|
||||||
'ちたくなかった': ['つ'],
|
|
||||||
'にたくなかった': ['ぬ'],
|
|
||||||
'びたくなかった': ['ぶ'],
|
|
||||||
'みたくなかった': ['む'],
|
|
||||||
'りたくなかった': ['る'],
|
|
||||||
},
|
|
||||||
type: LemmatizationRuleType.suffix,
|
|
||||||
),
|
|
||||||
validChildClasses: [WordClass.godanVerb],
|
|
||||||
wordClass: WordClass.godanVerb,
|
|
||||||
),
|
|
||||||
];
|
|
||||||
509
lib/util/lemmatizer/rules/godan_verbs.dart
Normal file
509
lib/util/lemmatizer/rules/godan_verbs.dart
Normal file
@@ -0,0 +1,509 @@
|
|||||||
|
import 'package:jadb/util/lemmatizer/lemmatizer.dart';
|
||||||
|
|
||||||
|
final LemmatizationRule godanVerbBase = LemmatizationRule(
|
||||||
|
name: 'Godan verb - base form',
|
||||||
|
terminal: true,
|
||||||
|
pattern: AllomorphPattern(
|
||||||
|
patterns: {
|
||||||
|
'う': ['う'],
|
||||||
|
'く': ['く'],
|
||||||
|
'ぐ': ['ぐ'],
|
||||||
|
'す': ['す'],
|
||||||
|
'つ': ['つ'],
|
||||||
|
'ぬ': ['ぬ'],
|
||||||
|
'ぶ': ['ぶ'],
|
||||||
|
'む': ['む'],
|
||||||
|
'る': ['る'],
|
||||||
|
},
|
||||||
|
type: LemmatizationRuleType.suffix,
|
||||||
|
),
|
||||||
|
validChildClasses: {WordClass.godanVerb},
|
||||||
|
wordClass: WordClass.godanVerb,
|
||||||
|
);
|
||||||
|
|
||||||
|
final LemmatizationRule godanVerbNegative = LemmatizationRule(
|
||||||
|
name: 'Godan verb - negative form',
|
||||||
|
pattern: AllomorphPattern(
|
||||||
|
patterns: {
|
||||||
|
'わない': ['う'],
|
||||||
|
'かない': ['く'],
|
||||||
|
'がない': ['ぐ'],
|
||||||
|
'さない': ['す'],
|
||||||
|
'たない': ['つ'],
|
||||||
|
'なない': ['ぬ'],
|
||||||
|
'ばない': ['ぶ'],
|
||||||
|
'まない': ['む'],
|
||||||
|
'らない': ['る'],
|
||||||
|
},
|
||||||
|
type: LemmatizationRuleType.suffix,
|
||||||
|
),
|
||||||
|
validChildClasses: {WordClass.godanVerb},
|
||||||
|
wordClass: WordClass.godanVerb,
|
||||||
|
);
|
||||||
|
|
||||||
|
final LemmatizationRule godanVerbPast = LemmatizationRule(
|
||||||
|
name: 'Godan verb - past form',
|
||||||
|
pattern: AllomorphPattern(
|
||||||
|
patterns: {
|
||||||
|
'した': ['す'],
|
||||||
|
'った': ['る', 'つ', 'う'],
|
||||||
|
'んだ': ['む', 'ぬ', 'ぶ'],
|
||||||
|
'いだ': ['ぐ'],
|
||||||
|
'いた': ['く'],
|
||||||
|
},
|
||||||
|
type: LemmatizationRuleType.suffix,
|
||||||
|
),
|
||||||
|
validChildClasses: {WordClass.godanVerb},
|
||||||
|
wordClass: WordClass.godanVerb,
|
||||||
|
);
|
||||||
|
|
||||||
|
final LemmatizationRule godanVerbTe = LemmatizationRule(
|
||||||
|
name: 'Godan verb - te-form',
|
||||||
|
pattern: AllomorphPattern(
|
||||||
|
patterns: {
|
||||||
|
'いて': ['く', 'ぐ'],
|
||||||
|
'して': ['す'],
|
||||||
|
'って': ['る', 'つ', 'う'],
|
||||||
|
'んで': ['む', 'ぬ', 'ぶ'],
|
||||||
|
},
|
||||||
|
type: LemmatizationRuleType.suffix,
|
||||||
|
),
|
||||||
|
validChildClasses: {WordClass.godanVerb},
|
||||||
|
wordClass: WordClass.godanVerb,
|
||||||
|
);
|
||||||
|
|
||||||
|
final LemmatizationRule godanVerbTeiru = LemmatizationRule(
|
||||||
|
name: 'Godan verb - te-form with いる',
|
||||||
|
pattern: AllomorphPattern(
|
||||||
|
patterns: {
|
||||||
|
'いている': ['く', 'ぐ'],
|
||||||
|
'している': ['す'],
|
||||||
|
'っている': ['る', 'つ', 'う'],
|
||||||
|
'んでいる': ['む', 'ぬ', 'ぶ'],
|
||||||
|
},
|
||||||
|
type: LemmatizationRuleType.suffix,
|
||||||
|
),
|
||||||
|
validChildClasses: {WordClass.godanVerb},
|
||||||
|
wordClass: WordClass.godanVerb,
|
||||||
|
);
|
||||||
|
|
||||||
|
final LemmatizationRule godanVerbTeita = LemmatizationRule(
|
||||||
|
name: 'Godan verb - te-form with いた',
|
||||||
|
pattern: AllomorphPattern(
|
||||||
|
patterns: {
|
||||||
|
'いていた': ['く', 'ぐ'],
|
||||||
|
'していた': ['す'],
|
||||||
|
'っていた': ['る', 'つ', 'う'],
|
||||||
|
'んでいた': ['む', 'ぬ', 'ぶ'],
|
||||||
|
},
|
||||||
|
type: LemmatizationRuleType.suffix,
|
||||||
|
),
|
||||||
|
validChildClasses: {WordClass.godanVerb},
|
||||||
|
wordClass: WordClass.godanVerb,
|
||||||
|
);
|
||||||
|
|
||||||
|
final LemmatizationRule godanVerbConditional = LemmatizationRule(
|
||||||
|
name: 'Godan verb - conditional form',
|
||||||
|
pattern: AllomorphPattern(
|
||||||
|
patterns: {
|
||||||
|
'けば': ['く'],
|
||||||
|
'げば': ['ぐ'],
|
||||||
|
'せば': ['す'],
|
||||||
|
'てば': ['つ', 'る', 'う'],
|
||||||
|
'ねば': ['ぬ'],
|
||||||
|
'べば': ['ぶ'],
|
||||||
|
'めば': ['む'],
|
||||||
|
},
|
||||||
|
type: LemmatizationRuleType.suffix,
|
||||||
|
),
|
||||||
|
validChildClasses: {WordClass.godanVerb},
|
||||||
|
wordClass: WordClass.godanVerb,
|
||||||
|
);
|
||||||
|
|
||||||
|
final LemmatizationRule godanVerbVolitional = LemmatizationRule(
|
||||||
|
name: 'Godan verb - volitional form',
|
||||||
|
pattern: AllomorphPattern(
|
||||||
|
patterns: {
|
||||||
|
'おう': ['う'],
|
||||||
|
'こう': ['く'],
|
||||||
|
'ごう': ['ぐ'],
|
||||||
|
'そう': ['す'],
|
||||||
|
'とう': ['つ', 'る', 'う'],
|
||||||
|
'のう': ['ぬ'],
|
||||||
|
'ぼう': ['ぶ'],
|
||||||
|
'もう': ['む'],
|
||||||
|
},
|
||||||
|
type: LemmatizationRuleType.suffix,
|
||||||
|
),
|
||||||
|
validChildClasses: {WordClass.godanVerb},
|
||||||
|
wordClass: WordClass.godanVerb,
|
||||||
|
);
|
||||||
|
|
||||||
|
final LemmatizationRule godanVerbPotential = LemmatizationRule(
|
||||||
|
name: 'Godan verb - potential form',
|
||||||
|
pattern: AllomorphPattern(
|
||||||
|
patterns: {
|
||||||
|
'ける': ['く'],
|
||||||
|
'げる': ['ぐ'],
|
||||||
|
'せる': ['す'],
|
||||||
|
'てる': ['つ', 'る', 'う'],
|
||||||
|
'ねる': ['ぬ'],
|
||||||
|
'べる': ['ぶ'],
|
||||||
|
'める': ['む'],
|
||||||
|
},
|
||||||
|
type: LemmatizationRuleType.suffix,
|
||||||
|
),
|
||||||
|
validChildClasses: {WordClass.godanVerb},
|
||||||
|
wordClass: WordClass.godanVerb,
|
||||||
|
);
|
||||||
|
|
||||||
|
final LemmatizationRule godanVerbPassive = LemmatizationRule(
|
||||||
|
name: 'Godan verb - passive form',
|
||||||
|
pattern: AllomorphPattern(
|
||||||
|
patterns: {
|
||||||
|
'かれる': ['く'],
|
||||||
|
'がれる': ['ぐ'],
|
||||||
|
'される': ['す'],
|
||||||
|
'たれる': ['つ', 'る', 'う'],
|
||||||
|
'なれる': ['ぬ'],
|
||||||
|
'ばれる': ['ぶ'],
|
||||||
|
'まれる': ['む'],
|
||||||
|
},
|
||||||
|
type: LemmatizationRuleType.suffix,
|
||||||
|
),
|
||||||
|
validChildClasses: {WordClass.godanVerb},
|
||||||
|
wordClass: WordClass.godanVerb,
|
||||||
|
);
|
||||||
|
|
||||||
|
final LemmatizationRule godanVerbCausative = LemmatizationRule(
|
||||||
|
name: 'Godan verb - causative form',
|
||||||
|
pattern: AllomorphPattern(
|
||||||
|
patterns: {
|
||||||
|
'かせる': ['く'],
|
||||||
|
'がせる': ['ぐ'],
|
||||||
|
'させる': ['す'],
|
||||||
|
'たせる': ['つ', 'る', 'う'],
|
||||||
|
'なせる': ['ぬ'],
|
||||||
|
'ばせる': ['ぶ'],
|
||||||
|
'ませる': ['む'],
|
||||||
|
},
|
||||||
|
type: LemmatizationRuleType.suffix,
|
||||||
|
),
|
||||||
|
validChildClasses: {WordClass.godanVerb},
|
||||||
|
wordClass: WordClass.godanVerb,
|
||||||
|
);
|
||||||
|
|
||||||
|
final LemmatizationRule godanVerbCausativePassive = LemmatizationRule(
|
||||||
|
name: 'Godan verb - causative-passive form',
|
||||||
|
pattern: AllomorphPattern(
|
||||||
|
patterns: {
|
||||||
|
'かされる': ['く'],
|
||||||
|
'がされる': ['ぐ'],
|
||||||
|
'される': ['す'],
|
||||||
|
'たされる': ['つ', 'る', 'う'],
|
||||||
|
'なされる': ['ぬ'],
|
||||||
|
'ばされる': ['ぶ'],
|
||||||
|
'まされる': ['む'],
|
||||||
|
},
|
||||||
|
type: LemmatizationRuleType.suffix,
|
||||||
|
),
|
||||||
|
validChildClasses: {WordClass.godanVerb},
|
||||||
|
wordClass: WordClass.godanVerb,
|
||||||
|
);
|
||||||
|
|
||||||
|
final LemmatizationRule godanVerbImperative = LemmatizationRule(
|
||||||
|
name: 'Godan verb - imperative form',
|
||||||
|
pattern: AllomorphPattern(
|
||||||
|
patterns: {
|
||||||
|
'え': ['う'],
|
||||||
|
'け': ['く'],
|
||||||
|
'げ': ['ぐ'],
|
||||||
|
'せ': ['す'],
|
||||||
|
'て': ['つ', 'る', 'う'],
|
||||||
|
'ね': ['ぬ'],
|
||||||
|
'べ': ['ぶ'],
|
||||||
|
'め': ['む'],
|
||||||
|
},
|
||||||
|
type: LemmatizationRuleType.suffix,
|
||||||
|
),
|
||||||
|
validChildClasses: {WordClass.godanVerb},
|
||||||
|
wordClass: WordClass.godanVerb,
|
||||||
|
);
|
||||||
|
|
||||||
|
final LemmatizationRule godanVerbNegativePast = LemmatizationRule(
|
||||||
|
name: 'Godan verb - negative past form',
|
||||||
|
pattern: AllomorphPattern(
|
||||||
|
patterns: {
|
||||||
|
'わなかった': ['う'],
|
||||||
|
'かなかった': ['く'],
|
||||||
|
'がなかった': ['ぐ'],
|
||||||
|
'さなかった': ['す'],
|
||||||
|
'たなかった': ['つ'],
|
||||||
|
'ななかった': ['ぬ'],
|
||||||
|
'ばなかった': ['ぶ'],
|
||||||
|
'まなかった': ['む'],
|
||||||
|
'らなかった': ['る'],
|
||||||
|
},
|
||||||
|
type: LemmatizationRuleType.suffix,
|
||||||
|
),
|
||||||
|
validChildClasses: {WordClass.godanVerb},
|
||||||
|
wordClass: WordClass.godanVerb,
|
||||||
|
);
|
||||||
|
|
||||||
|
final LemmatizationRule godanVerbNegativeTe = LemmatizationRule(
|
||||||
|
name: 'Godan verb - negative te-form',
|
||||||
|
pattern: AllomorphPattern(
|
||||||
|
patterns: {
|
||||||
|
'わなくて': ['う'],
|
||||||
|
'かなくて': ['く'],
|
||||||
|
'がなくて': ['ぐ'],
|
||||||
|
'さなくて': ['す'],
|
||||||
|
'たなくて': ['つ'],
|
||||||
|
'ななくて': ['ぬ'],
|
||||||
|
'ばなくて': ['ぶ'],
|
||||||
|
'まなくて': ['む'],
|
||||||
|
'らなくて': ['る'],
|
||||||
|
},
|
||||||
|
type: LemmatizationRuleType.suffix,
|
||||||
|
),
|
||||||
|
validChildClasses: {WordClass.godanVerb},
|
||||||
|
wordClass: WordClass.godanVerb,
|
||||||
|
);
|
||||||
|
|
||||||
|
final LemmatizationRule godanVerbNegativeConditional = LemmatizationRule(
|
||||||
|
name: 'Godan verb - negative conditional form',
|
||||||
|
pattern: AllomorphPattern(
|
||||||
|
patterns: {
|
||||||
|
'わなければ': ['う'],
|
||||||
|
'かなければ': ['く'],
|
||||||
|
'がなければ': ['ぐ'],
|
||||||
|
'さなければ': ['す'],
|
||||||
|
'たなければ': ['つ'],
|
||||||
|
'ななければ': ['ぬ'],
|
||||||
|
'ばなければ': ['ぶ'],
|
||||||
|
'まなければ': ['む'],
|
||||||
|
'らなければ': ['る'],
|
||||||
|
},
|
||||||
|
type: LemmatizationRuleType.suffix,
|
||||||
|
),
|
||||||
|
validChildClasses: {WordClass.godanVerb},
|
||||||
|
wordClass: WordClass.godanVerb,
|
||||||
|
);
|
||||||
|
|
||||||
|
final LemmatizationRule godanVerbNegativeVolitional = LemmatizationRule(
|
||||||
|
name: 'Godan verb - negative volitional form',
|
||||||
|
pattern: AllomorphPattern(
|
||||||
|
patterns: {
|
||||||
|
'うまい': ['う'],
|
||||||
|
'くまい': ['く'],
|
||||||
|
'ぐまい': ['ぐ'],
|
||||||
|
'すまい': ['す'],
|
||||||
|
'つまい': ['つ', 'る', 'う'],
|
||||||
|
'ぬまい': ['ぬ'],
|
||||||
|
'ぶまい': ['ぶ'],
|
||||||
|
'むまい': ['む'],
|
||||||
|
},
|
||||||
|
type: LemmatizationRuleType.suffix,
|
||||||
|
),
|
||||||
|
validChildClasses: {WordClass.godanVerb},
|
||||||
|
wordClass: WordClass.godanVerb,
|
||||||
|
);
|
||||||
|
|
||||||
|
final LemmatizationRule godanVerbNegativePotential = LemmatizationRule(
|
||||||
|
name: 'Godan verb - negative potential form',
|
||||||
|
pattern: AllomorphPattern(
|
||||||
|
patterns: {
|
||||||
|
'けない': ['く'],
|
||||||
|
'げない': ['ぐ'],
|
||||||
|
'せない': ['す'],
|
||||||
|
'てない': ['つ', 'る', 'う'],
|
||||||
|
'ねない': ['ぬ'],
|
||||||
|
'べない': ['ぶ'],
|
||||||
|
'めない': ['む'],
|
||||||
|
},
|
||||||
|
type: LemmatizationRuleType.suffix,
|
||||||
|
),
|
||||||
|
validChildClasses: {WordClass.godanVerb},
|
||||||
|
wordClass: WordClass.godanVerb,
|
||||||
|
);
|
||||||
|
|
||||||
|
final LemmatizationRule godanVerbNegativePassive = LemmatizationRule(
|
||||||
|
name: 'Godan verb - negative passive form',
|
||||||
|
pattern: AllomorphPattern(
|
||||||
|
patterns: {
|
||||||
|
'かれない': ['く'],
|
||||||
|
'がれない': ['ぐ'],
|
||||||
|
'されない': ['す'],
|
||||||
|
'たれない': ['つ', 'る', 'う'],
|
||||||
|
'なれない': ['ぬ'],
|
||||||
|
'ばれない': ['ぶ'],
|
||||||
|
'まれない': ['む'],
|
||||||
|
},
|
||||||
|
type: LemmatizationRuleType.suffix,
|
||||||
|
),
|
||||||
|
validChildClasses: {WordClass.godanVerb},
|
||||||
|
wordClass: WordClass.godanVerb,
|
||||||
|
);
|
||||||
|
|
||||||
|
final LemmatizationRule godanVerbNegativeCausative = LemmatizationRule(
|
||||||
|
name: 'Godan verb - negative causative form',
|
||||||
|
pattern: AllomorphPattern(
|
||||||
|
patterns: {
|
||||||
|
'かせない': ['く'],
|
||||||
|
'がせない': ['ぐ'],
|
||||||
|
'させない': ['す'],
|
||||||
|
'たせない': ['つ', 'る', 'う'],
|
||||||
|
'なせない': ['ぬ'],
|
||||||
|
'ばせない': ['ぶ'],
|
||||||
|
'ませない': ['む'],
|
||||||
|
},
|
||||||
|
type: LemmatizationRuleType.suffix,
|
||||||
|
),
|
||||||
|
validChildClasses: {WordClass.godanVerb},
|
||||||
|
wordClass: WordClass.godanVerb,
|
||||||
|
);
|
||||||
|
|
||||||
|
final LemmatizationRule godanVerbNegativeCausativePassive = LemmatizationRule(
|
||||||
|
name: 'Godan verb - negative causative-passive form',
|
||||||
|
pattern: AllomorphPattern(
|
||||||
|
patterns: {
|
||||||
|
'かされない': ['く'],
|
||||||
|
'がされない': ['ぐ'],
|
||||||
|
'されない': ['す'],
|
||||||
|
'たされない': ['つ', 'る', 'う'],
|
||||||
|
'なされない': ['ぬ'],
|
||||||
|
'ばされない': ['ぶ'],
|
||||||
|
'まされない': ['む'],
|
||||||
|
},
|
||||||
|
type: LemmatizationRuleType.suffix,
|
||||||
|
),
|
||||||
|
validChildClasses: {WordClass.godanVerb},
|
||||||
|
wordClass: WordClass.godanVerb,
|
||||||
|
);
|
||||||
|
|
||||||
|
final LemmatizationRule godanVerbNegativeImperative = LemmatizationRule(
|
||||||
|
name: 'Godan verb - negative imperative form',
|
||||||
|
pattern: AllomorphPattern(
|
||||||
|
patterns: {
|
||||||
|
'うな': ['う'],
|
||||||
|
'くな': ['く'],
|
||||||
|
'ぐな': ['ぐ'],
|
||||||
|
'すな': ['す'],
|
||||||
|
'つな': ['つ'],
|
||||||
|
'ぬな': ['ぬ'],
|
||||||
|
'ぶな': ['ぶ'],
|
||||||
|
'むな': ['む'],
|
||||||
|
'るな': ['る'],
|
||||||
|
},
|
||||||
|
type: LemmatizationRuleType.suffix,
|
||||||
|
),
|
||||||
|
validChildClasses: {WordClass.godanVerb},
|
||||||
|
wordClass: WordClass.godanVerb,
|
||||||
|
);
|
||||||
|
|
||||||
|
final LemmatizationRule godanVerbDesire = LemmatizationRule(
|
||||||
|
name: 'Godan verb - desire form',
|
||||||
|
pattern: AllomorphPattern(
|
||||||
|
patterns: {
|
||||||
|
'きたい': ['く'],
|
||||||
|
'ぎたい': ['ぐ'],
|
||||||
|
'したい': ['す'],
|
||||||
|
'ちたい': ['つ'],
|
||||||
|
'にたい': ['ぬ'],
|
||||||
|
'びたい': ['ぶ'],
|
||||||
|
'みたい': ['む'],
|
||||||
|
'りたい': ['る'],
|
||||||
|
},
|
||||||
|
type: LemmatizationRuleType.suffix,
|
||||||
|
),
|
||||||
|
validChildClasses: {WordClass.godanVerb},
|
||||||
|
wordClass: WordClass.godanVerb,
|
||||||
|
);
|
||||||
|
|
||||||
|
final LemmatizationRule godanVerbNegativeDesire = LemmatizationRule(
|
||||||
|
name: 'Godan verb - negative desire form',
|
||||||
|
pattern: AllomorphPattern(
|
||||||
|
patterns: {
|
||||||
|
'いたくない': ['う'],
|
||||||
|
'きたくない': ['く'],
|
||||||
|
'ぎたくない': ['ぐ'],
|
||||||
|
'したくない': ['す'],
|
||||||
|
'ちたくない': ['つ'],
|
||||||
|
'にたくない': ['ぬ'],
|
||||||
|
'びたくない': ['ぶ'],
|
||||||
|
'みたくない': ['む'],
|
||||||
|
'りたくない': ['る'],
|
||||||
|
},
|
||||||
|
type: LemmatizationRuleType.suffix,
|
||||||
|
),
|
||||||
|
validChildClasses: {WordClass.godanVerb},
|
||||||
|
wordClass: WordClass.godanVerb,
|
||||||
|
);
|
||||||
|
|
||||||
|
final LemmatizationRule godanVerbPastDesire = LemmatizationRule(
|
||||||
|
name: 'Godan verb - past desire form',
|
||||||
|
pattern: AllomorphPattern(
|
||||||
|
patterns: {
|
||||||
|
'きたかった': ['く'],
|
||||||
|
'ぎたかった': ['ぐ'],
|
||||||
|
'したかった': ['す'],
|
||||||
|
'ちたかった': ['つ'],
|
||||||
|
'にたかった': ['ぬ'],
|
||||||
|
'びたかった': ['ぶ'],
|
||||||
|
'みたかった': ['む'],
|
||||||
|
'りたかった': ['る'],
|
||||||
|
},
|
||||||
|
type: LemmatizationRuleType.suffix,
|
||||||
|
),
|
||||||
|
validChildClasses: {WordClass.godanVerb},
|
||||||
|
wordClass: WordClass.godanVerb,
|
||||||
|
);
|
||||||
|
|
||||||
|
final LemmatizationRule godanVerbNegativePastDesire = LemmatizationRule(
|
||||||
|
name: 'Godan verb - negative past desire form',
|
||||||
|
pattern: AllomorphPattern(
|
||||||
|
patterns: {
|
||||||
|
'いたくなかった': ['う'],
|
||||||
|
'きたくなかった': ['く'],
|
||||||
|
'ぎたくなかった': ['ぐ'],
|
||||||
|
'したくなかった': ['す'],
|
||||||
|
'ちたくなかった': ['つ'],
|
||||||
|
'にたくなかった': ['ぬ'],
|
||||||
|
'びたくなかった': ['ぶ'],
|
||||||
|
'みたくなかった': ['む'],
|
||||||
|
'りたくなかった': ['る'],
|
||||||
|
},
|
||||||
|
type: LemmatizationRuleType.suffix,
|
||||||
|
),
|
||||||
|
validChildClasses: {WordClass.godanVerb},
|
||||||
|
wordClass: WordClass.godanVerb,
|
||||||
|
);
|
||||||
|
|
||||||
|
final List<LemmatizationRule> godanVerbLemmatizationRules = List.unmodifiable([
|
||||||
|
godanVerbBase,
|
||||||
|
godanVerbNegative,
|
||||||
|
godanVerbPast,
|
||||||
|
godanVerbTe,
|
||||||
|
godanVerbTeiru,
|
||||||
|
godanVerbTeita,
|
||||||
|
godanVerbConditional,
|
||||||
|
godanVerbVolitional,
|
||||||
|
godanVerbPotential,
|
||||||
|
godanVerbPassive,
|
||||||
|
godanVerbCausative,
|
||||||
|
godanVerbCausativePassive,
|
||||||
|
godanVerbImperative,
|
||||||
|
godanVerbNegativePast,
|
||||||
|
godanVerbNegativeTe,
|
||||||
|
godanVerbNegativeConditional,
|
||||||
|
godanVerbNegativeVolitional,
|
||||||
|
godanVerbNegativePotential,
|
||||||
|
godanVerbNegativePassive,
|
||||||
|
godanVerbNegativeCausative,
|
||||||
|
godanVerbNegativeCausativePassive,
|
||||||
|
godanVerbNegativeImperative,
|
||||||
|
godanVerbDesire,
|
||||||
|
godanVerbNegativeDesire,
|
||||||
|
godanVerbPastDesire,
|
||||||
|
godanVerbNegativePastDesire,
|
||||||
|
]);
|
||||||
@@ -1,61 +0,0 @@
|
|||||||
import 'package:jadb/util/lemmatizer/lemmatizer.dart';
|
|
||||||
|
|
||||||
List<LemmatizationRule> iAdjectiveLemmatizationRules = [
|
|
||||||
LemmatizationRule.simple(
|
|
||||||
name: 'I adjective - base form',
|
|
||||||
terminal: true,
|
|
||||||
pattern: 'い',
|
|
||||||
replacement: 'い',
|
|
||||||
validChildClasses: [WordClass.iAdjective],
|
|
||||||
wordClass: WordClass.iAdjective,
|
|
||||||
),
|
|
||||||
LemmatizationRule.simple(
|
|
||||||
name: 'I adjective - negative form',
|
|
||||||
pattern: 'くない',
|
|
||||||
replacement: 'い',
|
|
||||||
validChildClasses: [WordClass.iAdjective],
|
|
||||||
wordClass: WordClass.iAdjective,
|
|
||||||
),
|
|
||||||
LemmatizationRule.simple(
|
|
||||||
name: 'I adjective - past form',
|
|
||||||
pattern: 'かった',
|
|
||||||
replacement: 'い',
|
|
||||||
validChildClasses: [WordClass.iAdjective],
|
|
||||||
wordClass: WordClass.iAdjective,
|
|
||||||
),
|
|
||||||
LemmatizationRule.simple(
|
|
||||||
name: 'I adjective - negative past form',
|
|
||||||
pattern: 'くなかった',
|
|
||||||
replacement: 'い',
|
|
||||||
validChildClasses: [WordClass.iAdjective],
|
|
||||||
wordClass: WordClass.iAdjective,
|
|
||||||
),
|
|
||||||
LemmatizationRule.simple(
|
|
||||||
name: 'I adjective - te-form',
|
|
||||||
pattern: 'くて',
|
|
||||||
replacement: 'い',
|
|
||||||
validChildClasses: [WordClass.iAdjective],
|
|
||||||
wordClass: WordClass.iAdjective,
|
|
||||||
),
|
|
||||||
LemmatizationRule.simple(
|
|
||||||
name: 'I adjective - conditional form',
|
|
||||||
pattern: 'ければ',
|
|
||||||
replacement: 'い',
|
|
||||||
validChildClasses: [WordClass.iAdjective],
|
|
||||||
wordClass: WordClass.iAdjective,
|
|
||||||
),
|
|
||||||
LemmatizationRule.simple(
|
|
||||||
name: 'I adjective - volitional form',
|
|
||||||
pattern: 'かろう',
|
|
||||||
replacement: 'い',
|
|
||||||
validChildClasses: [WordClass.iAdjective],
|
|
||||||
wordClass: WordClass.iAdjective,
|
|
||||||
),
|
|
||||||
LemmatizationRule.simple(
|
|
||||||
name: 'I adjective - continuative form',
|
|
||||||
pattern: 'く',
|
|
||||||
replacement: 'い',
|
|
||||||
validChildClasses: [WordClass.iAdjective],
|
|
||||||
wordClass: WordClass.iAdjective,
|
|
||||||
),
|
|
||||||
];
|
|
||||||
77
lib/util/lemmatizer/rules/i_adjectives.dart
Normal file
77
lib/util/lemmatizer/rules/i_adjectives.dart
Normal file
@@ -0,0 +1,77 @@
|
|||||||
|
import 'package:jadb/util/lemmatizer/lemmatizer.dart';
|
||||||
|
|
||||||
|
final LemmatizationRule iAdjectiveBase = LemmatizationRule.simple(
|
||||||
|
name: 'I adjective - base form',
|
||||||
|
terminal: true,
|
||||||
|
pattern: 'い',
|
||||||
|
replacement: 'い',
|
||||||
|
validChildClasses: {WordClass.iAdjective},
|
||||||
|
wordClass: WordClass.iAdjective,
|
||||||
|
);
|
||||||
|
|
||||||
|
final LemmatizationRule iAdjectiveNegative = LemmatizationRule.simple(
|
||||||
|
name: 'I adjective - negative form',
|
||||||
|
pattern: 'くない',
|
||||||
|
replacement: 'い',
|
||||||
|
validChildClasses: {WordClass.iAdjective},
|
||||||
|
wordClass: WordClass.iAdjective,
|
||||||
|
);
|
||||||
|
|
||||||
|
final LemmatizationRule iAdjectivePast = LemmatizationRule.simple(
|
||||||
|
name: 'I adjective - past form',
|
||||||
|
pattern: 'かった',
|
||||||
|
replacement: 'い',
|
||||||
|
validChildClasses: {WordClass.iAdjective},
|
||||||
|
wordClass: WordClass.iAdjective,
|
||||||
|
);
|
||||||
|
|
||||||
|
final LemmatizationRule iAdjectiveNegativePast = LemmatizationRule.simple(
|
||||||
|
name: 'I adjective - negative past form',
|
||||||
|
pattern: 'くなかった',
|
||||||
|
replacement: 'い',
|
||||||
|
validChildClasses: {WordClass.iAdjective},
|
||||||
|
wordClass: WordClass.iAdjective,
|
||||||
|
);
|
||||||
|
|
||||||
|
final LemmatizationRule iAdjectiveTe = LemmatizationRule.simple(
|
||||||
|
name: 'I adjective - te-form',
|
||||||
|
pattern: 'くて',
|
||||||
|
replacement: 'い',
|
||||||
|
validChildClasses: {WordClass.iAdjective},
|
||||||
|
wordClass: WordClass.iAdjective,
|
||||||
|
);
|
||||||
|
|
||||||
|
final LemmatizationRule iAdjectiveConditional = LemmatizationRule.simple(
|
||||||
|
name: 'I adjective - conditional form',
|
||||||
|
pattern: 'ければ',
|
||||||
|
replacement: 'い',
|
||||||
|
validChildClasses: {WordClass.iAdjective},
|
||||||
|
wordClass: WordClass.iAdjective,
|
||||||
|
);
|
||||||
|
|
||||||
|
final LemmatizationRule iAdjectiveVolitional = LemmatizationRule.simple(
|
||||||
|
name: 'I adjective - volitional form',
|
||||||
|
pattern: 'かろう',
|
||||||
|
replacement: 'い',
|
||||||
|
validChildClasses: {WordClass.iAdjective},
|
||||||
|
wordClass: WordClass.iAdjective,
|
||||||
|
);
|
||||||
|
|
||||||
|
final LemmatizationRule iAdjectiveContinuative = LemmatizationRule.simple(
|
||||||
|
name: 'I adjective - continuative form',
|
||||||
|
pattern: 'く',
|
||||||
|
replacement: 'い',
|
||||||
|
validChildClasses: {WordClass.iAdjective},
|
||||||
|
wordClass: WordClass.iAdjective,
|
||||||
|
);
|
||||||
|
|
||||||
|
final List<LemmatizationRule> iAdjectiveLemmatizationRules = List.unmodifiable([
|
||||||
|
iAdjectiveBase,
|
||||||
|
iAdjectiveNegative,
|
||||||
|
iAdjectivePast,
|
||||||
|
iAdjectiveNegativePast,
|
||||||
|
iAdjectiveTe,
|
||||||
|
iAdjectiveConditional,
|
||||||
|
iAdjectiveVolitional,
|
||||||
|
iAdjectiveContinuative,
|
||||||
|
]);
|
||||||
@@ -1,241 +0,0 @@
|
|||||||
import 'package:jadb/util/lemmatizer/lemmatizer.dart';
|
|
||||||
import 'package:jadb/util/text_filtering.dart';
|
|
||||||
|
|
||||||
List<Pattern> lookBehinds = [
|
|
||||||
kanjiRegex,
|
|
||||||
'き',
|
|
||||||
'ぎ',
|
|
||||||
'し',
|
|
||||||
'じ',
|
|
||||||
'ち',
|
|
||||||
'ぢ',
|
|
||||||
'に',
|
|
||||||
'ひ',
|
|
||||||
'び',
|
|
||||||
'び',
|
|
||||||
'み',
|
|
||||||
'り',
|
|
||||||
'け',
|
|
||||||
'げ',
|
|
||||||
'せ',
|
|
||||||
'ぜ',
|
|
||||||
'て',
|
|
||||||
'で',
|
|
||||||
'ね',
|
|
||||||
'へ',
|
|
||||||
'べ',
|
|
||||||
'め',
|
|
||||||
'れ',
|
|
||||||
];
|
|
||||||
|
|
||||||
List<LemmatizationRule> ichidanVerbLemmatizationRules = [
|
|
||||||
LemmatizationRule.simple(
|
|
||||||
name: 'Ichidan verb - base form',
|
|
||||||
terminal: true,
|
|
||||||
pattern: 'る',
|
|
||||||
replacement: 'る',
|
|
||||||
lookAheadBehind: lookBehinds,
|
|
||||||
validChildClasses: [WordClass.ichidanVerb],
|
|
||||||
wordClass: WordClass.ichidanVerb,
|
|
||||||
),
|
|
||||||
LemmatizationRule.simple(
|
|
||||||
name: 'Ichidan verb - negative form',
|
|
||||||
pattern: 'ない',
|
|
||||||
replacement: 'る',
|
|
||||||
lookAheadBehind: lookBehinds,
|
|
||||||
validChildClasses: [WordClass.ichidanVerb],
|
|
||||||
wordClass: WordClass.ichidanVerb,
|
|
||||||
),
|
|
||||||
LemmatizationRule.simple(
|
|
||||||
name: 'Ichidan verb - past form',
|
|
||||||
pattern: 'た',
|
|
||||||
replacement: 'る',
|
|
||||||
lookAheadBehind: lookBehinds,
|
|
||||||
validChildClasses: [WordClass.ichidanVerb],
|
|
||||||
wordClass: WordClass.ichidanVerb,
|
|
||||||
),
|
|
||||||
LemmatizationRule.simple(
|
|
||||||
name: 'Ichidan verb - te-form',
|
|
||||||
pattern: 'て',
|
|
||||||
replacement: 'る',
|
|
||||||
lookAheadBehind: lookBehinds,
|
|
||||||
validChildClasses: [WordClass.ichidanVerb],
|
|
||||||
wordClass: WordClass.ichidanVerb,
|
|
||||||
),
|
|
||||||
LemmatizationRule.simple(
|
|
||||||
name: 'Ichidan verb - te-form with いる',
|
|
||||||
pattern: 'ている',
|
|
||||||
replacement: 'る',
|
|
||||||
lookAheadBehind: lookBehinds,
|
|
||||||
validChildClasses: [WordClass.ichidanVerb],
|
|
||||||
wordClass: WordClass.ichidanVerb,
|
|
||||||
),
|
|
||||||
LemmatizationRule.simple(
|
|
||||||
name: 'Ichidan verb - te-form with いた',
|
|
||||||
pattern: 'ていた',
|
|
||||||
replacement: 'る',
|
|
||||||
lookAheadBehind: lookBehinds,
|
|
||||||
validChildClasses: [WordClass.ichidanVerb],
|
|
||||||
wordClass: WordClass.ichidanVerb,
|
|
||||||
),
|
|
||||||
LemmatizationRule.simple(
|
|
||||||
name: 'Ichidan verb - conditional form',
|
|
||||||
pattern: 'れば',
|
|
||||||
replacement: 'る',
|
|
||||||
lookAheadBehind: lookBehinds,
|
|
||||||
validChildClasses: [WordClass.ichidanVerb],
|
|
||||||
wordClass: WordClass.ichidanVerb,
|
|
||||||
),
|
|
||||||
LemmatizationRule.simple(
|
|
||||||
name: 'Ichidan verb - volitional form',
|
|
||||||
pattern: 'よう',
|
|
||||||
replacement: 'る',
|
|
||||||
lookAheadBehind: lookBehinds,
|
|
||||||
validChildClasses: [WordClass.ichidanVerb],
|
|
||||||
wordClass: WordClass.ichidanVerb,
|
|
||||||
),
|
|
||||||
LemmatizationRule.simple(
|
|
||||||
name: 'Ichidan verb - potential form',
|
|
||||||
pattern: 'られる',
|
|
||||||
replacement: 'る',
|
|
||||||
lookAheadBehind: lookBehinds,
|
|
||||||
validChildClasses: [WordClass.ichidanVerb],
|
|
||||||
wordClass: WordClass.ichidanVerb,
|
|
||||||
),
|
|
||||||
LemmatizationRule.simple(
|
|
||||||
name: 'Ichidan verb - passive form',
|
|
||||||
pattern: 'られる',
|
|
||||||
replacement: 'る',
|
|
||||||
lookAheadBehind: lookBehinds,
|
|
||||||
validChildClasses: [WordClass.ichidanVerb],
|
|
||||||
wordClass: WordClass.ichidanVerb,
|
|
||||||
),
|
|
||||||
LemmatizationRule.simple(
|
|
||||||
name: 'Ichidan verb - causative form',
|
|
||||||
pattern: 'させる',
|
|
||||||
replacement: 'る',
|
|
||||||
lookAheadBehind: lookBehinds,
|
|
||||||
validChildClasses: [WordClass.ichidanVerb],
|
|
||||||
wordClass: WordClass.ichidanVerb,
|
|
||||||
),
|
|
||||||
LemmatizationRule.simple(
|
|
||||||
name: 'Ichidan verb - causative passive form',
|
|
||||||
pattern: 'させられる',
|
|
||||||
replacement: 'る',
|
|
||||||
lookAheadBehind: lookBehinds,
|
|
||||||
validChildClasses: [WordClass.ichidanVerb],
|
|
||||||
wordClass: WordClass.ichidanVerb,
|
|
||||||
),
|
|
||||||
LemmatizationRule.simple(
|
|
||||||
name: 'Ichidan verb - imperative form',
|
|
||||||
pattern: 'れ',
|
|
||||||
replacement: 'る',
|
|
||||||
lookAheadBehind: lookBehinds,
|
|
||||||
validChildClasses: [WordClass.ichidanVerb],
|
|
||||||
wordClass: WordClass.ichidanVerb,
|
|
||||||
),
|
|
||||||
LemmatizationRule.simple(
|
|
||||||
name: 'Ichidan verb - negative past form',
|
|
||||||
pattern: 'なかった',
|
|
||||||
replacement: 'る',
|
|
||||||
lookAheadBehind: lookBehinds,
|
|
||||||
validChildClasses: [WordClass.ichidanVerb],
|
|
||||||
wordClass: WordClass.ichidanVerb,
|
|
||||||
),
|
|
||||||
LemmatizationRule.simple(
|
|
||||||
name: 'Ichidan verb - negative te-form',
|
|
||||||
pattern: 'なくて',
|
|
||||||
replacement: 'る',
|
|
||||||
lookAheadBehind: lookBehinds,
|
|
||||||
validChildClasses: [WordClass.ichidanVerb],
|
|
||||||
wordClass: WordClass.ichidanVerb,
|
|
||||||
),
|
|
||||||
LemmatizationRule.simple(
|
|
||||||
name: 'Ichidan verb - negative conditional form',
|
|
||||||
pattern: 'なければ',
|
|
||||||
replacement: 'る',
|
|
||||||
lookAheadBehind: lookBehinds,
|
|
||||||
validChildClasses: [WordClass.ichidanVerb],
|
|
||||||
wordClass: WordClass.ichidanVerb,
|
|
||||||
),
|
|
||||||
LemmatizationRule.simple(
|
|
||||||
name: 'Ichidan verb - negative volitional form',
|
|
||||||
pattern: 'なかろう',
|
|
||||||
replacement: 'る',
|
|
||||||
lookAheadBehind: lookBehinds,
|
|
||||||
validChildClasses: [WordClass.ichidanVerb],
|
|
||||||
wordClass: WordClass.ichidanVerb,
|
|
||||||
),
|
|
||||||
LemmatizationRule.simple(
|
|
||||||
name: 'Ichidan verb - negative potential form',
|
|
||||||
pattern: 'られない',
|
|
||||||
replacement: 'る',
|
|
||||||
lookAheadBehind: lookBehinds,
|
|
||||||
validChildClasses: [WordClass.ichidanVerb],
|
|
||||||
wordClass: WordClass.ichidanVerb,
|
|
||||||
),
|
|
||||||
LemmatizationRule.simple(
|
|
||||||
name: 'Ichidan verb - negative passive form',
|
|
||||||
pattern: 'られない',
|
|
||||||
replacement: 'る',
|
|
||||||
lookAheadBehind: lookBehinds,
|
|
||||||
validChildClasses: [WordClass.ichidanVerb],
|
|
||||||
wordClass: WordClass.ichidanVerb,
|
|
||||||
),
|
|
||||||
LemmatizationRule.simple(
|
|
||||||
name: 'Ichidan verb - negative causative form',
|
|
||||||
pattern: 'させない',
|
|
||||||
replacement: 'る',
|
|
||||||
lookAheadBehind: lookBehinds,
|
|
||||||
validChildClasses: [WordClass.ichidanVerb],
|
|
||||||
wordClass: WordClass.ichidanVerb,
|
|
||||||
),
|
|
||||||
LemmatizationRule.simple(
|
|
||||||
name: 'Ichidan verb - negative causative passive form',
|
|
||||||
pattern: 'させられない',
|
|
||||||
replacement: 'る',
|
|
||||||
lookAheadBehind: lookBehinds,
|
|
||||||
validChildClasses: [WordClass.ichidanVerb],
|
|
||||||
wordClass: WordClass.ichidanVerb,
|
|
||||||
),
|
|
||||||
LemmatizationRule.simple(
|
|
||||||
name: 'Ichidan verb - negative imperative form',
|
|
||||||
pattern: 'るな',
|
|
||||||
replacement: 'る',
|
|
||||||
lookAheadBehind: lookBehinds,
|
|
||||||
validChildClasses: [WordClass.ichidanVerb],
|
|
||||||
wordClass: WordClass.ichidanVerb,
|
|
||||||
),
|
|
||||||
LemmatizationRule.simple(
|
|
||||||
name: 'Ichidan verb - desire form',
|
|
||||||
pattern: 'たい',
|
|
||||||
replacement: 'る',
|
|
||||||
lookAheadBehind: lookBehinds,
|
|
||||||
validChildClasses: [WordClass.ichidanVerb],
|
|
||||||
wordClass: WordClass.ichidanVerb,
|
|
||||||
),
|
|
||||||
LemmatizationRule.simple(
|
|
||||||
name: 'Ichidan verb - negative desire form',
|
|
||||||
pattern: 'たくない',
|
|
||||||
replacement: 'る',
|
|
||||||
lookAheadBehind: lookBehinds,
|
|
||||||
validChildClasses: [WordClass.ichidanVerb],
|
|
||||||
wordClass: WordClass.ichidanVerb,
|
|
||||||
),
|
|
||||||
LemmatizationRule.simple(
|
|
||||||
name: 'Ichidan verb - past desire form',
|
|
||||||
pattern: 'たかった',
|
|
||||||
replacement: 'る',
|
|
||||||
lookAheadBehind: lookBehinds,
|
|
||||||
validChildClasses: [WordClass.ichidanVerb],
|
|
||||||
wordClass: WordClass.ichidanVerb,
|
|
||||||
),
|
|
||||||
LemmatizationRule.simple(
|
|
||||||
name: 'Ichidan verb - negative past desire form',
|
|
||||||
pattern: 'たくなかった',
|
|
||||||
replacement: 'る',
|
|
||||||
lookAheadBehind: lookBehinds,
|
|
||||||
validChildClasses: [WordClass.ichidanVerb],
|
|
||||||
wordClass: WordClass.ichidanVerb,
|
|
||||||
),
|
|
||||||
];
|
|
||||||
331
lib/util/lemmatizer/rules/ichidan_verbs.dart
Normal file
331
lib/util/lemmatizer/rules/ichidan_verbs.dart
Normal file
@@ -0,0 +1,331 @@
|
|||||||
|
import 'package:jadb/util/lemmatizer/lemmatizer.dart';
|
||||||
|
import 'package:jadb/util/text_filtering.dart';
|
||||||
|
|
||||||
|
final List<Pattern> _lookBehinds = [
|
||||||
|
kanjiRegex,
|
||||||
|
'き',
|
||||||
|
'ぎ',
|
||||||
|
'し',
|
||||||
|
'じ',
|
||||||
|
'ち',
|
||||||
|
'ぢ',
|
||||||
|
'に',
|
||||||
|
'ひ',
|
||||||
|
'び',
|
||||||
|
'び',
|
||||||
|
'み',
|
||||||
|
'り',
|
||||||
|
'け',
|
||||||
|
'げ',
|
||||||
|
'せ',
|
||||||
|
'ぜ',
|
||||||
|
'て',
|
||||||
|
'で',
|
||||||
|
'ね',
|
||||||
|
'へ',
|
||||||
|
'べ',
|
||||||
|
'め',
|
||||||
|
'れ',
|
||||||
|
];
|
||||||
|
|
||||||
|
final LemmatizationRule ichidanVerbBase = LemmatizationRule.simple(
|
||||||
|
name: 'Ichidan verb - base form',
|
||||||
|
terminal: true,
|
||||||
|
pattern: 'る',
|
||||||
|
replacement: 'る',
|
||||||
|
lookAheadBehind: _lookBehinds,
|
||||||
|
validChildClasses: {WordClass.ichidanVerb},
|
||||||
|
wordClass: WordClass.ichidanVerb,
|
||||||
|
);
|
||||||
|
|
||||||
|
final LemmatizationRule ichidanVerbNegative = LemmatizationRule.simple(
|
||||||
|
name: 'Ichidan verb - negative form',
|
||||||
|
pattern: 'ない',
|
||||||
|
replacement: 'る',
|
||||||
|
lookAheadBehind: _lookBehinds,
|
||||||
|
validChildClasses: {WordClass.ichidanVerb},
|
||||||
|
wordClass: WordClass.ichidanVerb,
|
||||||
|
);
|
||||||
|
final LemmatizationRule ichidanVerbPast = LemmatizationRule.simple(
|
||||||
|
name: 'Ichidan verb - past form',
|
||||||
|
pattern: 'た',
|
||||||
|
replacement: 'る',
|
||||||
|
lookAheadBehind: _lookBehinds,
|
||||||
|
validChildClasses: {WordClass.ichidanVerb},
|
||||||
|
wordClass: WordClass.ichidanVerb,
|
||||||
|
);
|
||||||
|
|
||||||
|
final LemmatizationRule ichidanVerbTe = LemmatizationRule.simple(
|
||||||
|
name: 'Ichidan verb - te-form',
|
||||||
|
pattern: 'て',
|
||||||
|
replacement: 'る',
|
||||||
|
lookAheadBehind: _lookBehinds,
|
||||||
|
validChildClasses: {WordClass.ichidanVerb},
|
||||||
|
wordClass: WordClass.ichidanVerb,
|
||||||
|
);
|
||||||
|
|
||||||
|
final LemmatizationRule ichidanVerbTeiru = LemmatizationRule.simple(
|
||||||
|
name: 'Ichidan verb - te-form with いる',
|
||||||
|
pattern: 'ている',
|
||||||
|
replacement: 'る',
|
||||||
|
lookAheadBehind: _lookBehinds,
|
||||||
|
validChildClasses: {WordClass.ichidanVerb},
|
||||||
|
wordClass: WordClass.ichidanVerb,
|
||||||
|
);
|
||||||
|
|
||||||
|
final LemmatizationRule ichidanVerbTeita = LemmatizationRule.simple(
|
||||||
|
name: 'Ichidan verb - te-form with いた',
|
||||||
|
pattern: 'ていた',
|
||||||
|
replacement: 'る',
|
||||||
|
lookAheadBehind: _lookBehinds,
|
||||||
|
validChildClasses: {WordClass.ichidanVerb},
|
||||||
|
wordClass: WordClass.ichidanVerb,
|
||||||
|
);
|
||||||
|
|
||||||
|
final LemmatizationRule ichidanVerbConditional = LemmatizationRule.simple(
|
||||||
|
name: 'Ichidan verb - conditional form',
|
||||||
|
pattern: 'れば',
|
||||||
|
replacement: 'る',
|
||||||
|
lookAheadBehind: _lookBehinds,
|
||||||
|
validChildClasses: {WordClass.ichidanVerb},
|
||||||
|
wordClass: WordClass.ichidanVerb,
|
||||||
|
);
|
||||||
|
|
||||||
|
final LemmatizationRule ichidanVerbVolitional = LemmatizationRule.simple(
|
||||||
|
name: 'Ichidan verb - volitional form',
|
||||||
|
pattern: 'よう',
|
||||||
|
replacement: 'る',
|
||||||
|
lookAheadBehind: _lookBehinds,
|
||||||
|
validChildClasses: {WordClass.ichidanVerb},
|
||||||
|
wordClass: WordClass.ichidanVerb,
|
||||||
|
);
|
||||||
|
|
||||||
|
final LemmatizationRule ichidanVerbPotential = LemmatizationRule.simple(
|
||||||
|
name: 'Ichidan verb - potential form',
|
||||||
|
pattern: 'られる',
|
||||||
|
replacement: 'る',
|
||||||
|
lookAheadBehind: _lookBehinds,
|
||||||
|
validChildClasses: {WordClass.ichidanVerb},
|
||||||
|
wordClass: WordClass.ichidanVerb,
|
||||||
|
);
|
||||||
|
|
||||||
|
final LemmatizationRule ichidanVerbPassive = LemmatizationRule.simple(
|
||||||
|
name: 'Ichidan verb - passive form',
|
||||||
|
pattern: 'られる',
|
||||||
|
replacement: 'る',
|
||||||
|
lookAheadBehind: _lookBehinds,
|
||||||
|
validChildClasses: {WordClass.ichidanVerb},
|
||||||
|
wordClass: WordClass.ichidanVerb,
|
||||||
|
);
|
||||||
|
|
||||||
|
final LemmatizationRule ichidanVerbCausative = LemmatizationRule.simple(
|
||||||
|
name: 'Ichidan verb - causative form',
|
||||||
|
pattern: 'させる',
|
||||||
|
replacement: 'る',
|
||||||
|
lookAheadBehind: _lookBehinds,
|
||||||
|
validChildClasses: {WordClass.ichidanVerb},
|
||||||
|
wordClass: WordClass.ichidanVerb,
|
||||||
|
);
|
||||||
|
|
||||||
|
final LemmatizationRule ichidanVerbCausativePassive = LemmatizationRule.simple(
|
||||||
|
name: 'Ichidan verb - causative passive form',
|
||||||
|
pattern: 'させられる',
|
||||||
|
replacement: 'る',
|
||||||
|
lookAheadBehind: _lookBehinds,
|
||||||
|
validChildClasses: {WordClass.ichidanVerb},
|
||||||
|
wordClass: WordClass.ichidanVerb,
|
||||||
|
);
|
||||||
|
|
||||||
|
final LemmatizationRule ichidanVerbImperative = LemmatizationRule.simple(
|
||||||
|
name: 'Ichidan verb - imperative form',
|
||||||
|
pattern: 'れ',
|
||||||
|
replacement: 'る',
|
||||||
|
lookAheadBehind: _lookBehinds,
|
||||||
|
validChildClasses: {WordClass.ichidanVerb},
|
||||||
|
wordClass: WordClass.ichidanVerb,
|
||||||
|
);
|
||||||
|
|
||||||
|
final LemmatizationRule ichidanVerbNegativePast = LemmatizationRule.simple(
|
||||||
|
name: 'Ichidan verb - negative past form',
|
||||||
|
pattern: 'なかった',
|
||||||
|
replacement: 'る',
|
||||||
|
lookAheadBehind: _lookBehinds,
|
||||||
|
validChildClasses: {WordClass.ichidanVerb},
|
||||||
|
wordClass: WordClass.ichidanVerb,
|
||||||
|
);
|
||||||
|
|
||||||
|
final LemmatizationRule ichidanVerbNegativeTe = LemmatizationRule.simple(
|
||||||
|
name: 'Ichidan verb - negative te-form',
|
||||||
|
pattern: 'なくて',
|
||||||
|
replacement: 'る',
|
||||||
|
lookAheadBehind: _lookBehinds,
|
||||||
|
validChildClasses: {WordClass.ichidanVerb},
|
||||||
|
wordClass: WordClass.ichidanVerb,
|
||||||
|
);
|
||||||
|
|
||||||
|
final LemmatizationRule ichidanVerbNegativeConditional =
|
||||||
|
LemmatizationRule.simple(
|
||||||
|
name: 'Ichidan verb - negative conditional form',
|
||||||
|
pattern: 'なければ',
|
||||||
|
replacement: 'る',
|
||||||
|
lookAheadBehind: _lookBehinds,
|
||||||
|
validChildClasses: {WordClass.ichidanVerb},
|
||||||
|
wordClass: WordClass.ichidanVerb,
|
||||||
|
);
|
||||||
|
|
||||||
|
final LemmatizationRule ichidanVerbNegativeConditionalVariant1 =
|
||||||
|
LemmatizationRule.simple(
|
||||||
|
name: 'Ichidan verb - negative conditional form (informal variant)',
|
||||||
|
pattern: 'なきゃ',
|
||||||
|
replacement: 'る',
|
||||||
|
lookAheadBehind: _lookBehinds,
|
||||||
|
validChildClasses: {WordClass.ichidanVerb},
|
||||||
|
wordClass: WordClass.ichidanVerb,
|
||||||
|
);
|
||||||
|
|
||||||
|
final LemmatizationRule ichidanVerbNegativeConditionalVariant2 =
|
||||||
|
LemmatizationRule.simple(
|
||||||
|
name: 'Ichidan verb - negative conditional form (informal variant)',
|
||||||
|
pattern: 'なくちゃ',
|
||||||
|
replacement: 'る',
|
||||||
|
lookAheadBehind: _lookBehinds,
|
||||||
|
validChildClasses: {WordClass.ichidanVerb},
|
||||||
|
wordClass: WordClass.ichidanVerb,
|
||||||
|
);
|
||||||
|
|
||||||
|
final LemmatizationRule ichidanVerbNegativeConditionalVariant3 =
|
||||||
|
LemmatizationRule.simple(
|
||||||
|
name: 'Ichidan verb - negative conditional form (informal variant)',
|
||||||
|
pattern: 'ないと',
|
||||||
|
replacement: 'る',
|
||||||
|
lookAheadBehind: _lookBehinds,
|
||||||
|
validChildClasses: {WordClass.ichidanVerb},
|
||||||
|
wordClass: WordClass.ichidanVerb,
|
||||||
|
);
|
||||||
|
|
||||||
|
final LemmatizationRule ichidanVerbNegativeVolitional =
|
||||||
|
LemmatizationRule.simple(
|
||||||
|
name: 'Ichidan verb - negative volitional form',
|
||||||
|
pattern: 'なかろう',
|
||||||
|
replacement: 'る',
|
||||||
|
lookAheadBehind: _lookBehinds,
|
||||||
|
validChildClasses: {WordClass.ichidanVerb},
|
||||||
|
wordClass: WordClass.ichidanVerb,
|
||||||
|
);
|
||||||
|
|
||||||
|
final LemmatizationRule ichidanVerbNegativePotential = LemmatizationRule.simple(
|
||||||
|
name: 'Ichidan verb - negative potential form',
|
||||||
|
pattern: 'られない',
|
||||||
|
replacement: 'る',
|
||||||
|
lookAheadBehind: _lookBehinds,
|
||||||
|
validChildClasses: {WordClass.ichidanVerb},
|
||||||
|
wordClass: WordClass.ichidanVerb,
|
||||||
|
);
|
||||||
|
|
||||||
|
final LemmatizationRule ichidanVerbNegativePassive = LemmatizationRule.simple(
|
||||||
|
name: 'Ichidan verb - negative passive form',
|
||||||
|
pattern: 'られない',
|
||||||
|
replacement: 'る',
|
||||||
|
lookAheadBehind: _lookBehinds,
|
||||||
|
validChildClasses: {WordClass.ichidanVerb},
|
||||||
|
wordClass: WordClass.ichidanVerb,
|
||||||
|
);
|
||||||
|
|
||||||
|
final LemmatizationRule ichidanVerbNegativeCausative = LemmatizationRule.simple(
|
||||||
|
name: 'Ichidan verb - negative causative form',
|
||||||
|
pattern: 'させない',
|
||||||
|
replacement: 'る',
|
||||||
|
lookAheadBehind: _lookBehinds,
|
||||||
|
validChildClasses: {WordClass.ichidanVerb},
|
||||||
|
wordClass: WordClass.ichidanVerb,
|
||||||
|
);
|
||||||
|
|
||||||
|
final LemmatizationRule ichidanVerbNegativeCausativePassive =
|
||||||
|
LemmatizationRule.simple(
|
||||||
|
name: 'Ichidan verb - negative causative passive form',
|
||||||
|
pattern: 'させられない',
|
||||||
|
replacement: 'る',
|
||||||
|
lookAheadBehind: _lookBehinds,
|
||||||
|
validChildClasses: {WordClass.ichidanVerb},
|
||||||
|
wordClass: WordClass.ichidanVerb,
|
||||||
|
);
|
||||||
|
|
||||||
|
final LemmatizationRule ichidanVerbNegativeImperative =
|
||||||
|
LemmatizationRule.simple(
|
||||||
|
name: 'Ichidan verb - negative imperative form',
|
||||||
|
pattern: 'るな',
|
||||||
|
replacement: 'る',
|
||||||
|
lookAheadBehind: _lookBehinds,
|
||||||
|
validChildClasses: {WordClass.ichidanVerb},
|
||||||
|
wordClass: WordClass.ichidanVerb,
|
||||||
|
);
|
||||||
|
|
||||||
|
final LemmatizationRule ichidanVerbDesire = LemmatizationRule.simple(
|
||||||
|
name: 'Ichidan verb - desire form',
|
||||||
|
pattern: 'たい',
|
||||||
|
replacement: 'る',
|
||||||
|
lookAheadBehind: _lookBehinds,
|
||||||
|
validChildClasses: {WordClass.ichidanVerb},
|
||||||
|
wordClass: WordClass.ichidanVerb,
|
||||||
|
);
|
||||||
|
|
||||||
|
final LemmatizationRule ichidanVerbNegativeDesire = LemmatizationRule.simple(
|
||||||
|
name: 'Ichidan verb - negative desire form',
|
||||||
|
pattern: 'たくない',
|
||||||
|
replacement: 'る',
|
||||||
|
lookAheadBehind: _lookBehinds,
|
||||||
|
validChildClasses: {WordClass.ichidanVerb},
|
||||||
|
wordClass: WordClass.ichidanVerb,
|
||||||
|
);
|
||||||
|
|
||||||
|
final LemmatizationRule ichidanVerbPastDesire = LemmatizationRule.simple(
|
||||||
|
name: 'Ichidan verb - past desire form',
|
||||||
|
pattern: 'たかった',
|
||||||
|
replacement: 'る',
|
||||||
|
lookAheadBehind: _lookBehinds,
|
||||||
|
validChildClasses: {WordClass.ichidanVerb},
|
||||||
|
wordClass: WordClass.ichidanVerb,
|
||||||
|
);
|
||||||
|
|
||||||
|
final LemmatizationRule ichidanVerbNegativePastDesire =
|
||||||
|
LemmatizationRule.simple(
|
||||||
|
name: 'Ichidan verb - negative past desire form',
|
||||||
|
pattern: 'たくなかった',
|
||||||
|
replacement: 'る',
|
||||||
|
lookAheadBehind: _lookBehinds,
|
||||||
|
validChildClasses: {WordClass.ichidanVerb},
|
||||||
|
wordClass: WordClass.ichidanVerb,
|
||||||
|
);
|
||||||
|
|
||||||
|
final List<LemmatizationRule> ichidanVerbLemmatizationRules =
|
||||||
|
List.unmodifiable([
|
||||||
|
ichidanVerbBase,
|
||||||
|
ichidanVerbNegative,
|
||||||
|
ichidanVerbPast,
|
||||||
|
ichidanVerbTe,
|
||||||
|
ichidanVerbTeiru,
|
||||||
|
ichidanVerbTeita,
|
||||||
|
ichidanVerbConditional,
|
||||||
|
ichidanVerbVolitional,
|
||||||
|
ichidanVerbPotential,
|
||||||
|
ichidanVerbPassive,
|
||||||
|
ichidanVerbCausative,
|
||||||
|
ichidanVerbCausativePassive,
|
||||||
|
ichidanVerbImperative,
|
||||||
|
ichidanVerbNegativePast,
|
||||||
|
ichidanVerbNegativeTe,
|
||||||
|
ichidanVerbNegativeConditional,
|
||||||
|
ichidanVerbNegativeConditionalVariant1,
|
||||||
|
ichidanVerbNegativeConditionalVariant2,
|
||||||
|
ichidanVerbNegativeConditionalVariant3,
|
||||||
|
ichidanVerbNegativeVolitional,
|
||||||
|
ichidanVerbNegativePotential,
|
||||||
|
ichidanVerbNegativePassive,
|
||||||
|
ichidanVerbNegativeCausative,
|
||||||
|
ichidanVerbNegativeCausativePassive,
|
||||||
|
ichidanVerbNegativeImperative,
|
||||||
|
ichidanVerbDesire,
|
||||||
|
ichidanVerbNegativeDesire,
|
||||||
|
ichidanVerbPastDesire,
|
||||||
|
ichidanVerbNegativePastDesire,
|
||||||
|
]);
|
||||||
@@ -1,9 +1,9 @@
|
|||||||
// Source: https://github.com/Kimtaro/ve/blob/master/lib/providers/japanese_transliterators.rb
|
// Source: https://github.com/Kimtaro/ve/blob/master/lib/providers/japanese_transliterators.rb
|
||||||
|
|
||||||
const hiragana_syllabic_n = 'ん';
|
const hiraganaSyllabicN = 'ん';
|
||||||
const hiragana_small_tsu = 'っ';
|
const hiraganaSmallTsu = 'っ';
|
||||||
|
|
||||||
const Map<String, String> hiragana_to_latin = {
|
const Map<String, String> hiraganaToLatin = {
|
||||||
'あ': 'a',
|
'あ': 'a',
|
||||||
'い': 'i',
|
'い': 'i',
|
||||||
'う': 'u',
|
'う': 'u',
|
||||||
@@ -209,7 +209,7 @@ const Map<String, String> hiragana_to_latin = {
|
|||||||
'ゟ': 'yori',
|
'ゟ': 'yori',
|
||||||
};
|
};
|
||||||
|
|
||||||
const Map<String, String> latin_to_hiragana = {
|
const Map<String, String> latinToHiragana = {
|
||||||
'a': 'あ',
|
'a': 'あ',
|
||||||
'i': 'い',
|
'i': 'い',
|
||||||
'u': 'う',
|
'u': 'う',
|
||||||
@@ -481,12 +481,13 @@ const Map<String, String> latin_to_hiragana = {
|
|||||||
'#~': '〜',
|
'#~': '〜',
|
||||||
};
|
};
|
||||||
|
|
||||||
bool _smallTsu(String forConversion) => forConversion == hiragana_small_tsu;
|
bool _smallTsu(String forConversion) => forConversion == hiraganaSmallTsu;
|
||||||
bool _nFollowedByYuYeYo(String forConversion, String kana) =>
|
bool _nFollowedByYuYeYo(String forConversion, String kana) =>
|
||||||
forConversion == hiragana_syllabic_n &&
|
forConversion == hiraganaSyllabicN &&
|
||||||
kana.length > 1 &&
|
kana.length > 1 &&
|
||||||
'やゆよ'.contains(kana.substring(1, 2));
|
'やゆよ'.contains(kana.substring(1, 2));
|
||||||
|
|
||||||
|
/// Transliterates a string of hiragana characters to Latin script (romaji).
|
||||||
String transliterateHiraganaToLatin(String hiragana) {
|
String transliterateHiraganaToLatin(String hiragana) {
|
||||||
String kana = hiragana;
|
String kana = hiragana;
|
||||||
String romaji = '';
|
String romaji = '';
|
||||||
@@ -505,7 +506,7 @@ String transliterateHiraganaToLatin(String hiragana) {
|
|||||||
} else if (_nFollowedByYuYeYo(forConversion, kana)) {
|
} else if (_nFollowedByYuYeYo(forConversion, kana)) {
|
||||||
mora = "n'";
|
mora = "n'";
|
||||||
}
|
}
|
||||||
mora ??= hiragana_to_latin[forConversion];
|
mora ??= hiraganaToLatin[forConversion];
|
||||||
|
|
||||||
if (mora != null) {
|
if (mora != null) {
|
||||||
if (geminate) {
|
if (geminate) {
|
||||||
@@ -524,15 +525,61 @@ String transliterateHiraganaToLatin(String hiragana) {
|
|||||||
return romaji;
|
return romaji;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns a list of pairs of indices into the input and output strings,
|
||||||
|
/// indicating which characters in the input string correspond to which characters in the output string.
|
||||||
|
List<(int, int)> transliterateHiraganaToLatinSpan(String hiragana) {
|
||||||
|
String kana = hiragana;
|
||||||
|
String romaji = '';
|
||||||
|
final List<(int, int)> spans = [];
|
||||||
|
bool geminate = false;
|
||||||
|
int kanaIndex = 0;
|
||||||
|
|
||||||
|
while (kana.isNotEmpty) {
|
||||||
|
final lengths = [if (kana.length > 1) 2, 1];
|
||||||
|
for (final length in lengths) {
|
||||||
|
final String forConversion = kana.substring(0, length);
|
||||||
|
String? mora;
|
||||||
|
|
||||||
|
if (_smallTsu(forConversion)) {
|
||||||
|
geminate = true;
|
||||||
|
kana = kana.replaceRange(0, length, '');
|
||||||
|
break;
|
||||||
|
} else if (_nFollowedByYuYeYo(forConversion, kana)) {
|
||||||
|
mora = "n'";
|
||||||
|
}
|
||||||
|
mora ??= hiraganaToLatin[forConversion];
|
||||||
|
|
||||||
|
if (mora != null) {
|
||||||
|
if (geminate) {
|
||||||
|
geminate = false;
|
||||||
|
romaji += mora.substring(0, 1);
|
||||||
|
}
|
||||||
|
spans.add((kanaIndex, romaji.length));
|
||||||
|
romaji += mora;
|
||||||
|
kana = kana.replaceRange(0, length, '');
|
||||||
|
kanaIndex += length;
|
||||||
|
break;
|
||||||
|
} else if (length == 1) {
|
||||||
|
spans.add((kanaIndex, romaji.length));
|
||||||
|
romaji += forConversion;
|
||||||
|
kana = kana.replaceRange(0, length, '');
|
||||||
|
kanaIndex += length;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return spans;
|
||||||
|
}
|
||||||
|
|
||||||
bool _doubleNFollowedByAIUEO(String forConversion) =>
|
bool _doubleNFollowedByAIUEO(String forConversion) =>
|
||||||
RegExp(r'^nn[aiueo]$').hasMatch(forConversion);
|
RegExp(r'^nn[aiueo]$').hasMatch(forConversion);
|
||||||
bool _hasTableMatch(String forConversion) =>
|
bool _hasTableMatch(String forConversion) =>
|
||||||
latin_to_hiragana[forConversion] != null;
|
latinToHiragana[forConversion] != null;
|
||||||
bool _hasDoubleConsonant(String forConversion, int length) =>
|
bool _hasDoubleConsonant(String forConversion, int length) =>
|
||||||
forConversion == 'tch' ||
|
forConversion == 'tch' ||
|
||||||
(length == 2 &&
|
(length == 2 &&
|
||||||
RegExp(r'^([kgsztdnbpmyrlwchf])\1$').hasMatch(forConversion));
|
RegExp(r'^([kgsztdnbpmyrlwchf])\1$').hasMatch(forConversion));
|
||||||
|
|
||||||
|
/// Transliterates a string of Latin script (romaji) to hiragana characters.
|
||||||
String transliterateLatinToHiragana(String latin) {
|
String transliterateLatinToHiragana(String latin) {
|
||||||
String romaji = latin
|
String romaji = latin
|
||||||
.toLowerCase()
|
.toLowerCase()
|
||||||
@@ -549,12 +596,12 @@ String transliterateLatinToHiragana(String latin) {
|
|||||||
final String forConversion = romaji.substring(0, length);
|
final String forConversion = romaji.substring(0, length);
|
||||||
|
|
||||||
if (_doubleNFollowedByAIUEO(forConversion)) {
|
if (_doubleNFollowedByAIUEO(forConversion)) {
|
||||||
mora = hiragana_syllabic_n;
|
mora = hiraganaSyllabicN;
|
||||||
forRemoval = 1;
|
forRemoval = 1;
|
||||||
} else if (_hasTableMatch(forConversion)) {
|
} else if (_hasTableMatch(forConversion)) {
|
||||||
mora = latin_to_hiragana[forConversion];
|
mora = latinToHiragana[forConversion];
|
||||||
} else if (_hasDoubleConsonant(forConversion, length)) {
|
} else if (_hasDoubleConsonant(forConversion, length)) {
|
||||||
mora = hiragana_small_tsu;
|
mora = hiraganaSmallTsu;
|
||||||
forRemoval = 1;
|
forRemoval = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -572,6 +619,53 @@ String transliterateLatinToHiragana(String latin) {
|
|||||||
return kana;
|
return kana;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns a list of pairs of indices into the input and output strings,
|
||||||
|
/// indicating which characters in the input string correspond to which characters in the output string.
|
||||||
|
List<(int, int)> transliterateLatinToHiraganaSpan(String latin) {
|
||||||
|
String romaji = latin
|
||||||
|
.toLowerCase()
|
||||||
|
.replaceAll('mb', 'nb')
|
||||||
|
.replaceAll('mp', 'np');
|
||||||
|
String kana = '';
|
||||||
|
final List<(int, int)> spans = [];
|
||||||
|
int latinIndex = 0;
|
||||||
|
|
||||||
|
while (romaji.isNotEmpty) {
|
||||||
|
final lengths = [if (romaji.length > 2) 3, if (romaji.length > 1) 2, 1];
|
||||||
|
|
||||||
|
for (final length in lengths) {
|
||||||
|
String? mora;
|
||||||
|
int forRemoval = length;
|
||||||
|
final String forConversion = romaji.substring(0, length);
|
||||||
|
|
||||||
|
if (_doubleNFollowedByAIUEO(forConversion)) {
|
||||||
|
mora = hiraganaSyllabicN;
|
||||||
|
forRemoval = 1;
|
||||||
|
} else if (_hasTableMatch(forConversion)) {
|
||||||
|
mora = latinToHiragana[forConversion];
|
||||||
|
} else if (_hasDoubleConsonant(forConversion, length)) {
|
||||||
|
mora = hiraganaSmallTsu;
|
||||||
|
forRemoval = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (mora != null) {
|
||||||
|
spans.add((latinIndex, kana.length));
|
||||||
|
kana += mora;
|
||||||
|
romaji = romaji.replaceRange(0, forRemoval, '');
|
||||||
|
latinIndex += forRemoval;
|
||||||
|
break;
|
||||||
|
} else if (length == 1) {
|
||||||
|
spans.add((latinIndex, kana.length));
|
||||||
|
kana += forConversion;
|
||||||
|
romaji = romaji.replaceRange(0, 1, '');
|
||||||
|
latinIndex += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return spans;
|
||||||
|
}
|
||||||
|
|
||||||
String _transposeCodepointsInRange(
|
String _transposeCodepointsInRange(
|
||||||
String text,
|
String text,
|
||||||
int distance,
|
int distance,
|
||||||
@@ -583,15 +677,19 @@ String _transposeCodepointsInRange(
|
|||||||
),
|
),
|
||||||
);
|
);
|
||||||
|
|
||||||
|
/// Transliterates a string of kana characters (hiragana or katakana) to Latin script (romaji).
|
||||||
String transliterateKanaToLatin(String kana) =>
|
String transliterateKanaToLatin(String kana) =>
|
||||||
transliterateHiraganaToLatin(transliterateKatakanaToHiragana(kana));
|
transliterateHiraganaToLatin(transliterateKatakanaToHiragana(kana));
|
||||||
|
|
||||||
|
/// Transliterates a string of Latin script (romaji) to katakana characters.
|
||||||
String transliterateLatinToKatakana(String latin) =>
|
String transliterateLatinToKatakana(String latin) =>
|
||||||
transliterateHiraganaToKatakana(transliterateLatinToHiragana(latin));
|
transliterateHiraganaToKatakana(transliterateLatinToHiragana(latin));
|
||||||
|
|
||||||
|
/// Transliterates a string of katakana characters to hiragana characters.
|
||||||
String transliterateKatakanaToHiragana(String katakana) =>
|
String transliterateKatakanaToHiragana(String katakana) =>
|
||||||
_transposeCodepointsInRange(katakana, -96, 12449, 12534);
|
_transposeCodepointsInRange(katakana, -96, 12449, 12534);
|
||||||
|
|
||||||
|
/// Transliterates a string of hiragana characters to katakana characters.
|
||||||
String transliterateHiraganaToKatakana(String hiragana) =>
|
String transliterateHiraganaToKatakana(String hiragana) =>
|
||||||
_transposeCodepointsInRange(hiragana, 96, 12353, 12438);
|
_transposeCodepointsInRange(hiragana, 96, 12353, 12438);
|
||||||
|
|
||||||
|
|||||||
@@ -1,3 +1,16 @@
|
|||||||
|
CREATE TABLE "JMdict_Version" (
|
||||||
|
"version" VARCHAR(10) PRIMARY KEY NOT NULL,
|
||||||
|
"date" DATE NOT NULL,
|
||||||
|
"hash" VARCHAR(64) NOT NULL
|
||||||
|
) WITHOUT ROWID;
|
||||||
|
|
||||||
|
CREATE TRIGGER "JMdict_Version_SingleRow"
|
||||||
|
BEFORE INSERT ON "JMdict_Version"
|
||||||
|
WHEN (SELECT COUNT(*) FROM "JMdict_Version") >= 1
|
||||||
|
BEGIN
|
||||||
|
SELECT RAISE(FAIL, 'Only one row allowed in JMdict_Version');
|
||||||
|
END;
|
||||||
|
|
||||||
CREATE TABLE "JMdict_InfoDialect" (
|
CREATE TABLE "JMdict_InfoDialect" (
|
||||||
"id" VARCHAR(4) PRIMARY KEY NOT NULL,
|
"id" VARCHAR(4) PRIMARY KEY NOT NULL,
|
||||||
"description" TEXT NOT NULL
|
"description" TEXT NOT NULL
|
||||||
|
|||||||
@@ -1,3 +1,16 @@
|
|||||||
|
CREATE TABLE "JMdict_JLPT_Version" (
|
||||||
|
"version" VARCHAR(10) PRIMARY KEY NOT NULL,
|
||||||
|
"date" DATE NOT NULL,
|
||||||
|
"hash" VARCHAR(64) NOT NULL
|
||||||
|
) WITHOUT ROWID;
|
||||||
|
|
||||||
|
CREATE TRIGGER "JMdict_JLPT_Version_SingleRow"
|
||||||
|
BEFORE INSERT ON "JMdict_JLPT_Version"
|
||||||
|
WHEN (SELECT COUNT(*) FROM "JMdict_JLPT_Version") >= 1
|
||||||
|
BEGIN
|
||||||
|
SELECT RAISE(FAIL, 'Only one row allowed in JMdict_JLPT_Version');
|
||||||
|
END;
|
||||||
|
|
||||||
CREATE TABLE "JMdict_JLPTTag" (
|
CREATE TABLE "JMdict_JLPTTag" (
|
||||||
"entryId" INTEGER NOT NULL,
|
"entryId" INTEGER NOT NULL,
|
||||||
"jlptLevel" CHAR(2) NOT NULL CHECK ("jlptLevel" in ('N5', 'N4', 'N3', 'N2', 'N1')),
|
"jlptLevel" CHAR(2) NOT NULL CHECK ("jlptLevel" in ('N5', 'N4', 'N3', 'N2', 'N1')),
|
||||||
|
|||||||
@@ -1,3 +1,16 @@
|
|||||||
|
CREATE TABLE "RADKFILE_Version" (
|
||||||
|
"version" VARCHAR(10) PRIMARY KEY NOT NULL,
|
||||||
|
"date" DATE NOT NULL,
|
||||||
|
"hash" VARCHAR(64) NOT NULL
|
||||||
|
) WITHOUT ROWID;
|
||||||
|
|
||||||
|
CREATE TRIGGER "RADKFILE_Version_SingleRow"
|
||||||
|
BEFORE INSERT ON "RADKFILE_Version"
|
||||||
|
WHEN (SELECT COUNT(*) FROM "RADKFILE_Version") >= 1
|
||||||
|
BEGIN
|
||||||
|
SELECT RAISE(FAIL, 'Only one row allowed in RADKFILE_Version');
|
||||||
|
END;
|
||||||
|
|
||||||
CREATE TABLE "RADKFILE" (
|
CREATE TABLE "RADKFILE" (
|
||||||
"kanji" CHAR(1) NOT NULL,
|
"kanji" CHAR(1) NOT NULL,
|
||||||
"radical" CHAR(1) NOT NULL,
|
"radical" CHAR(1) NOT NULL,
|
||||||
|
|||||||
@@ -1,3 +1,16 @@
|
|||||||
|
CREATE TABLE "KANJIDIC_Version" (
|
||||||
|
"version" VARCHAR(10) PRIMARY KEY NOT NULL,
|
||||||
|
"date" DATE NOT NULL,
|
||||||
|
"hash" VARCHAR(64) NOT NULL
|
||||||
|
) WITHOUT ROWID;
|
||||||
|
|
||||||
|
CREATE TRIGGER "KANJIDIC_Version_SingleRow"
|
||||||
|
BEFORE INSERT ON "KANJIDIC_Version"
|
||||||
|
WHEN (SELECT COUNT(*) FROM "KANJIDIC_Version") >= 1
|
||||||
|
BEGIN
|
||||||
|
SELECT RAISE(FAIL, 'Only one row allowed in KANJIDIC_Version');
|
||||||
|
END;
|
||||||
|
|
||||||
CREATE TABLE "KANJIDIC_Character" (
|
CREATE TABLE "KANJIDIC_Character" (
|
||||||
"literal" CHAR(1) NOT NULL PRIMARY KEY,
|
"literal" CHAR(1) NOT NULL PRIMARY KEY,
|
||||||
"grade" INTEGER CHECK ("grade" BETWEEN 1 AND 10),
|
"grade" INTEGER CHECK ("grade" BETWEEN 1 AND 10),
|
||||||
|
|||||||
@@ -33,6 +33,14 @@ packages:
|
|||||||
url: "https://pub.dev"
|
url: "https://pub.dev"
|
||||||
source: hosted
|
source: hosted
|
||||||
version: "2.13.0"
|
version: "2.13.0"
|
||||||
|
benchmark_harness:
|
||||||
|
dependency: "direct dev"
|
||||||
|
description:
|
||||||
|
name: benchmark_harness
|
||||||
|
sha256: a2d3c4c83cac0126bf38e41eaf7bd9ed4f6635f1ee1a0cbc6f79fa9736c62cbd
|
||||||
|
url: "https://pub.dev"
|
||||||
|
source: hosted
|
||||||
|
version: "2.4.0"
|
||||||
boolean_selector:
|
boolean_selector:
|
||||||
dependency: transitive
|
dependency: transitive
|
||||||
description:
|
description:
|
||||||
|
|||||||
@@ -18,6 +18,7 @@ dependencies:
|
|||||||
xml: ^6.5.0
|
xml: ^6.5.0
|
||||||
|
|
||||||
dev_dependencies:
|
dev_dependencies:
|
||||||
|
benchmark_harness: ^2.4.0
|
||||||
lints: ^6.0.0
|
lints: ^6.0.0
|
||||||
test: ^1.25.15
|
test: ^1.25.15
|
||||||
|
|
||||||
|
|||||||
21
test/const_data/kanji_grades.dart
Normal file
21
test/const_data/kanji_grades.dart
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
import 'package:collection/collection.dart';
|
||||||
|
import 'package:jadb/const_data/kanji_grades.dart';
|
||||||
|
import 'package:test/test.dart';
|
||||||
|
|
||||||
|
void main() {
|
||||||
|
test('All constant kanji in jouyouKanjiByGrades are 2136 in total', () {
|
||||||
|
expect(jouyouKanjiByGrades.values.flattenedToSet.length, 2136);
|
||||||
|
});
|
||||||
|
|
||||||
|
// test('All constant kanji in jouyouKanjiByGrades are present in KANJIDIC2', () {
|
||||||
|
|
||||||
|
// });
|
||||||
|
|
||||||
|
// test('All constant kanji in jouyouKanjiByGrades have matching grade as in KANJIDIC2', () {
|
||||||
|
|
||||||
|
// });
|
||||||
|
|
||||||
|
// test('All constant kanji in jouyouKanjiByGradesAndStrokeCount have matching stroke count as in KANJIDIC2', () {
|
||||||
|
|
||||||
|
// });
|
||||||
|
}
|
||||||
17
test/const_data/radicals_test.dart
Normal file
17
test/const_data/radicals_test.dart
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
import 'package:collection/collection.dart';
|
||||||
|
import 'package:jadb/const_data/radicals.dart';
|
||||||
|
import 'package:test/test.dart';
|
||||||
|
|
||||||
|
void main() {
|
||||||
|
test('All constant radicals are 253 in total', () {
|
||||||
|
expect(radicals.values.flattenedToSet.length, 253);
|
||||||
|
});
|
||||||
|
|
||||||
|
// test('All constant radicals have at least 1 associated kanji in KANJIDIC2', () {
|
||||||
|
|
||||||
|
// });
|
||||||
|
|
||||||
|
// test('All constant radicals match the stroke order listed in KANJIDIC2', () {
|
||||||
|
|
||||||
|
// });
|
||||||
|
}
|
||||||
@@ -1,9 +0,0 @@
|
|||||||
import 'package:collection/collection.dart';
|
|
||||||
import 'package:jadb/const_data/kanji_grades.dart';
|
|
||||||
import 'package:test/test.dart';
|
|
||||||
|
|
||||||
void main() {
|
|
||||||
test('Assert 2136 kanji in jouyou set', () {
|
|
||||||
expect(JOUYOU_KANJI_BY_GRADES.values.flattenedToSet.length, 2136);
|
|
||||||
});
|
|
||||||
}
|
|
||||||
@@ -1,21 +1,20 @@
|
|||||||
import 'dart:ffi';
|
|
||||||
import 'dart:io';
|
|
||||||
|
|
||||||
import 'package:jadb/models/create_empty_db.dart';
|
import 'package:jadb/models/create_empty_db.dart';
|
||||||
import 'package:jadb/search.dart';
|
import 'package:jadb/search.dart';
|
||||||
import 'package:sqflite_common_ffi/sqflite_ffi.dart';
|
import 'package:sqflite_common_ffi/sqflite_ffi.dart';
|
||||||
// import 'package:sqlite3/open.dart';
|
// import 'package:sqlite3/open.dart';
|
||||||
import 'package:test/test.dart';
|
import 'package:test/test.dart';
|
||||||
|
|
||||||
Future<DatabaseExecutor> setup_inmemory_database() async {
|
Future<DatabaseExecutor> setupInMemoryDatabase() async {
|
||||||
final dbConnection = await createDatabaseFactoryFfi().openDatabase(':memory:');
|
final dbConnection = await createDatabaseFactoryFfi().openDatabase(
|
||||||
|
':memory:',
|
||||||
|
);
|
||||||
|
|
||||||
return dbConnection;
|
return dbConnection;
|
||||||
}
|
}
|
||||||
|
|
||||||
void main() {
|
void main() {
|
||||||
test('Create empty db', () async {
|
test('Create empty db', () async {
|
||||||
final db = await setup_inmemory_database();
|
final db = await setupInMemoryDatabase();
|
||||||
|
|
||||||
await createEmptyDb(db);
|
await createEmptyDb(db);
|
||||||
|
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ import 'setup_database_connection.dart';
|
|||||||
|
|
||||||
void main() {
|
void main() {
|
||||||
test('Filter kanji', () async {
|
test('Filter kanji', () async {
|
||||||
final connection = await setup_database_connection();
|
final connection = await setupDatabaseConnection();
|
||||||
|
|
||||||
final result = await connection.filterKanji([
|
final result = await connection.filterKanji([
|
||||||
'a',
|
'a',
|
||||||
@@ -26,4 +26,27 @@ void main() {
|
|||||||
|
|
||||||
expect(result.join(), '漢字地字');
|
expect(result.join(), '漢字地字');
|
||||||
});
|
});
|
||||||
|
|
||||||
|
test('Filter kanji - deduplicate', () async {
|
||||||
|
final connection = await setupDatabaseConnection();
|
||||||
|
|
||||||
|
final result = await connection.filterKanji([
|
||||||
|
'a',
|
||||||
|
'b',
|
||||||
|
'c',
|
||||||
|
'漢',
|
||||||
|
'字',
|
||||||
|
'地',
|
||||||
|
'字',
|
||||||
|
'か',
|
||||||
|
'な',
|
||||||
|
'.',
|
||||||
|
'!',
|
||||||
|
'@',
|
||||||
|
';',
|
||||||
|
'々',
|
||||||
|
], deduplicate: true);
|
||||||
|
|
||||||
|
expect(result.join(), '漢字地');
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -6,16 +6,16 @@ import 'setup_database_connection.dart';
|
|||||||
|
|
||||||
void main() {
|
void main() {
|
||||||
test('Search a kanji', () async {
|
test('Search a kanji', () async {
|
||||||
final connection = await setup_database_connection();
|
final connection = await setupDatabaseConnection();
|
||||||
|
|
||||||
final result = await connection.jadbSearchKanji('漢');
|
final result = await connection.jadbSearchKanji('漢');
|
||||||
expect(result, isNotNull);
|
expect(result, isNotNull);
|
||||||
});
|
});
|
||||||
|
|
||||||
group('Search all jouyou kanji', () {
|
group('Search all jouyou kanji', () {
|
||||||
JOUYOU_KANJI_BY_GRADES.forEach((grade, characters) {
|
jouyouKanjiByGrades.forEach((grade, characters) {
|
||||||
test('Search all kanji in grade $grade', () async {
|
test('Search all kanji in grade $grade', () async {
|
||||||
final connection = await setup_database_connection();
|
final connection = await setupDatabaseConnection();
|
||||||
|
|
||||||
for (final character in characters) {
|
for (final character in characters) {
|
||||||
final result = await connection.jadbSearchKanji(character);
|
final result = await connection.jadbSearchKanji(character);
|
||||||
|
|||||||
@@ -191,4 +191,67 @@ void main() {
|
|||||||
),
|
),
|
||||||
]);
|
]);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
test('Infer match with no matches', () {
|
||||||
|
final wordSearchResult = WordSearchResult(
|
||||||
|
entryId: 0,
|
||||||
|
score: 0,
|
||||||
|
isCommon: false,
|
||||||
|
jlptLevel: JlptLevel.none,
|
||||||
|
kanjiInfo: {},
|
||||||
|
readingInfo: {},
|
||||||
|
japanese: [WordSearchRuby(base: '仮名', furigana: 'かな')],
|
||||||
|
senses: [
|
||||||
|
WordSearchSense(
|
||||||
|
antonyms: [],
|
||||||
|
dialects: [],
|
||||||
|
englishDefinitions: ['kana'],
|
||||||
|
fields: [],
|
||||||
|
info: [],
|
||||||
|
languageSource: [],
|
||||||
|
misc: [],
|
||||||
|
partsOfSpeech: [],
|
||||||
|
restrictedToKanji: [],
|
||||||
|
restrictedToReading: [],
|
||||||
|
seeAlso: [],
|
||||||
|
),
|
||||||
|
],
|
||||||
|
sources: WordSearchSources.empty(),
|
||||||
|
);
|
||||||
|
|
||||||
|
wordSearchResult.inferMatchSpans('xyz');
|
||||||
|
|
||||||
|
expect(wordSearchResult.matchSpans, isEmpty);
|
||||||
|
});
|
||||||
|
|
||||||
|
test('Infer multiple matches of same substring', () {
|
||||||
|
final wordSearchResult = WordSearchResult(
|
||||||
|
entryId: 0,
|
||||||
|
score: 0,
|
||||||
|
isCommon: false,
|
||||||
|
jlptLevel: JlptLevel.none,
|
||||||
|
kanjiInfo: {},
|
||||||
|
readingInfo: {},
|
||||||
|
japanese: [WordSearchRuby(base: 'ああ')],
|
||||||
|
senses: [],
|
||||||
|
sources: WordSearchSources.empty(),
|
||||||
|
);
|
||||||
|
|
||||||
|
wordSearchResult.inferMatchSpans('あ');
|
||||||
|
|
||||||
|
expect(wordSearchResult.matchSpans, [
|
||||||
|
WordSearchMatchSpan(
|
||||||
|
spanType: WordSearchMatchSpanType.kanji,
|
||||||
|
start: 0,
|
||||||
|
end: 1,
|
||||||
|
index: 0,
|
||||||
|
),
|
||||||
|
WordSearchMatchSpan(
|
||||||
|
spanType: WordSearchMatchSpanType.kanji,
|
||||||
|
start: 1,
|
||||||
|
end: 2,
|
||||||
|
index: 0,
|
||||||
|
),
|
||||||
|
]);
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -3,7 +3,7 @@ import 'dart:io';
|
|||||||
import 'package:jadb/_data_ingestion/open_local_db.dart';
|
import 'package:jadb/_data_ingestion/open_local_db.dart';
|
||||||
import 'package:sqflite_common/sqlite_api.dart';
|
import 'package:sqflite_common/sqlite_api.dart';
|
||||||
|
|
||||||
Future<Database> setup_database_connection() async {
|
Future<Database> setupDatabaseConnection() async {
|
||||||
final libSqlitePath = Platform.environment['LIBSQLITE_PATH'];
|
final libSqlitePath = Platform.environment['LIBSQLITE_PATH'];
|
||||||
final jadbPath = Platform.environment['JADB_PATH'];
|
final jadbPath = Platform.environment['JADB_PATH'];
|
||||||
|
|
||||||
|
|||||||
@@ -5,43 +5,43 @@ import 'setup_database_connection.dart';
|
|||||||
|
|
||||||
void main() {
|
void main() {
|
||||||
test('Search a word - english - auto', () async {
|
test('Search a word - english - auto', () async {
|
||||||
final connection = await setup_database_connection();
|
final connection = await setupDatabaseConnection();
|
||||||
final result = await connection.jadbSearchWord('kana');
|
final result = await connection.jadbSearchWord('kana');
|
||||||
expect(result, isNotNull);
|
expect(result, isNotNull);
|
||||||
});
|
});
|
||||||
|
|
||||||
test('Get word search count - english - auto', () async {
|
test('Get word search count - english - auto', () async {
|
||||||
final connection = await setup_database_connection();
|
final connection = await setupDatabaseConnection();
|
||||||
final result = await connection.jadbSearchWordCount('kana');
|
final result = await connection.jadbSearchWordCount('kana');
|
||||||
expect(result, isNotNull);
|
expect(result, isNotNull);
|
||||||
});
|
});
|
||||||
|
|
||||||
test('Search a word - japanese kana - auto', () async {
|
test('Search a word - japanese kana - auto', () async {
|
||||||
final connection = await setup_database_connection();
|
final connection = await setupDatabaseConnection();
|
||||||
final result = await connection.jadbSearchWord('かな');
|
final result = await connection.jadbSearchWord('かな');
|
||||||
expect(result, isNotNull);
|
expect(result, isNotNull);
|
||||||
});
|
});
|
||||||
|
|
||||||
test('Get word search count - japanese kana - auto', () async {
|
test('Get word search count - japanese kana - auto', () async {
|
||||||
final connection = await setup_database_connection();
|
final connection = await setupDatabaseConnection();
|
||||||
final result = await connection.jadbSearchWordCount('かな');
|
final result = await connection.jadbSearchWordCount('かな');
|
||||||
expect(result, isNotNull);
|
expect(result, isNotNull);
|
||||||
});
|
});
|
||||||
|
|
||||||
test('Search a word - japanese kanji - auto', () async {
|
test('Search a word - japanese kanji - auto', () async {
|
||||||
final connection = await setup_database_connection();
|
final connection = await setupDatabaseConnection();
|
||||||
final result = await connection.jadbSearchWord('仮名');
|
final result = await connection.jadbSearchWord('仮名');
|
||||||
expect(result, isNotNull);
|
expect(result, isNotNull);
|
||||||
});
|
});
|
||||||
|
|
||||||
test('Get word search count - japanese kanji - auto', () async {
|
test('Get word search count - japanese kanji - auto', () async {
|
||||||
final connection = await setup_database_connection();
|
final connection = await setupDatabaseConnection();
|
||||||
final result = await connection.jadbSearchWordCount('仮名');
|
final result = await connection.jadbSearchWordCount('仮名');
|
||||||
expect(result, isNotNull);
|
expect(result, isNotNull);
|
||||||
});
|
});
|
||||||
|
|
||||||
test('Get a word by id', () async {
|
test('Get a word by id', () async {
|
||||||
final connection = await setup_database_connection();
|
final connection = await setupDatabaseConnection();
|
||||||
final result = await connection.jadbGetWordById(1577090);
|
final result = await connection.jadbGetWordById(1577090);
|
||||||
expect(result, isNotNull);
|
expect(result, isNotNull);
|
||||||
});
|
});
|
||||||
@@ -49,7 +49,7 @@ void main() {
|
|||||||
test(
|
test(
|
||||||
'Serialize all words',
|
'Serialize all words',
|
||||||
() async {
|
() async {
|
||||||
final connection = await setup_database_connection();
|
final connection = await setupDatabaseConnection();
|
||||||
|
|
||||||
// Test serializing all words
|
// Test serializing all words
|
||||||
for (final letter in 'aiueoksthnmyrw'.split('')) {
|
for (final letter in 'aiueoksthnmyrw'.split('')) {
|
||||||
|
|||||||
51
test/util/lemmatizer/lemmatizer_test.dart
Normal file
51
test/util/lemmatizer/lemmatizer_test.dart
Normal file
@@ -0,0 +1,51 @@
|
|||||||
|
import 'package:jadb/util/lemmatizer/lemmatizer.dart';
|
||||||
|
import 'package:jadb/util/lemmatizer/rules/godan_verbs.dart';
|
||||||
|
import 'package:jadb/util/lemmatizer/rules/ichidan_verbs.dart';
|
||||||
|
import 'package:test/test.dart';
|
||||||
|
|
||||||
|
const List<String> ichidanVerbs = [
|
||||||
|
'食べる',
|
||||||
|
'食べた',
|
||||||
|
'食べさせられた',
|
||||||
|
'食べたい',
|
||||||
|
'食べたくない',
|
||||||
|
'食べたくなかった',
|
||||||
|
];
|
||||||
|
const List<String> godanVerbs = [
|
||||||
|
'泳ぐ',
|
||||||
|
'泳いだ',
|
||||||
|
'泳げる',
|
||||||
|
// '泳げれた',
|
||||||
|
];
|
||||||
|
|
||||||
|
bool findRuleRecursively(Lemmatized result, LemmatizationRule expectedRule) {
|
||||||
|
if (result.rule == expectedRule) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (final c in result.children) {
|
||||||
|
if (findRuleRecursively(c, expectedRule)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
void main() {
|
||||||
|
group('Lemmatize Ichidan Verbs', () {
|
||||||
|
for (final v in ichidanVerbs) {
|
||||||
|
test('Lemmatize Ichidan Verb $v', () {
|
||||||
|
expect(findRuleRecursively(lemmatize(v), ichidanVerbBase), true);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
group('Lemmatize Godan Verbs', () {
|
||||||
|
for (final v in godanVerbs) {
|
||||||
|
test('Lemmatize Godan Verb $v', () {
|
||||||
|
expect(findRuleRecursively(lemmatize(v), godanVerbBase), true);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
14
test/util/lemmatizer/rules/godan_verbs_test.dart
Normal file
14
test/util/lemmatizer/rules/godan_verbs_test.dart
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
import 'package:jadb/util/lemmatizer/rules/godan_verbs.dart';
|
||||||
|
import 'package:test/test.dart';
|
||||||
|
|
||||||
|
void main() {
|
||||||
|
test('Test Godan Verb Base Rule', () {
|
||||||
|
expect(godanVerbBase.matches('泳ぐ'), true);
|
||||||
|
expect(godanVerbBase.apply('泳ぐ'), ['泳ぐ']);
|
||||||
|
});
|
||||||
|
|
||||||
|
test('Test Godan Verb Negative Rule', () {
|
||||||
|
expect(godanVerbNegative.matches('泳がない'), true);
|
||||||
|
expect(godanVerbNegative.apply('泳がない'), ['泳ぐ']);
|
||||||
|
});
|
||||||
|
}
|
||||||
15
test/util/lemmatizer/rules/i_adjectives_test.dart
Normal file
15
test/util/lemmatizer/rules/i_adjectives_test.dart
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
import 'package:jadb/util/lemmatizer/rules/i_adjectives.dart';
|
||||||
|
import 'package:test/test.dart';
|
||||||
|
|
||||||
|
void main() {
|
||||||
|
test('Test i-adjective Base Rule', () {
|
||||||
|
expect(iAdjectiveBase.matches('怪しい'), true);
|
||||||
|
expect(iAdjectiveBase.apply('怪しい'), ['怪しい']);
|
||||||
|
});
|
||||||
|
|
||||||
|
|
||||||
|
test('Test i-adjective Negative Rule', () {
|
||||||
|
expect(iAdjectiveNegative.matches('怪しくない'), true);
|
||||||
|
expect(iAdjectiveNegative.apply('怪しくない'), ['怪しい']);
|
||||||
|
});
|
||||||
|
}
|
||||||
14
test/util/lemmatizer/rules/ichidan_verbs_test.dart
Normal file
14
test/util/lemmatizer/rules/ichidan_verbs_test.dart
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
import 'package:jadb/util/lemmatizer/rules/ichidan_verbs.dart';
|
||||||
|
import 'package:test/test.dart';
|
||||||
|
|
||||||
|
void main() {
|
||||||
|
test('Test Ichidan Verb Base Rule', () {
|
||||||
|
expect(ichidanVerbBase.matches('食べる'), true);
|
||||||
|
expect(ichidanVerbBase.apply('食べる'), ['食べる']);
|
||||||
|
});
|
||||||
|
|
||||||
|
test('Test Ichidan Verb Negative Rule', () {
|
||||||
|
expect(ichidanVerbNegative.matches('食べない'), true);
|
||||||
|
expect(ichidanVerbNegative.apply('食べない'), ['食べる']);
|
||||||
|
});
|
||||||
|
}
|
||||||
15
test/util/lemmatizer/rules_test.dart
Normal file
15
test/util/lemmatizer/rules_test.dart
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
import 'package:jadb/util/lemmatizer/lemmatizer.dart';
|
||||||
|
import 'package:jadb/util/lemmatizer/rules.dart';
|
||||||
|
import 'package:test/test.dart';
|
||||||
|
|
||||||
|
void main() {
|
||||||
|
test('Assert lemmatizerRulesByWordClass is correct', () {
|
||||||
|
for (final entry in lemmatizationRulesByWordClass.entries) {
|
||||||
|
final WordClass wordClass = entry.key;
|
||||||
|
final List<LemmatizationRule> rules = entry.value;
|
||||||
|
for (final LemmatizationRule rule in rules) {
|
||||||
|
expect(wordClass, rule.wordClass);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
@@ -37,6 +37,35 @@ void main() {
|
|||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
group('Romaji -> Hiragana Spans', () {
|
||||||
|
void Function() expectSpans(String input, List<String> expected) => () {
|
||||||
|
final result = transliterateLatinToHiraganaSpan(input);
|
||||||
|
final trans = transliterateLatinToHiragana(input);
|
||||||
|
for (int i = 0; i < result.length; i++) {
|
||||||
|
expect(
|
||||||
|
trans.substring(
|
||||||
|
result[i].$2,
|
||||||
|
i == result.length - 1 ? trans.length : result[i + 1].$2,
|
||||||
|
),
|
||||||
|
expected[i],
|
||||||
|
);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
test('Basic test', expectSpans('katamari', ['か', 'た', 'ま', 'り']));
|
||||||
|
test(
|
||||||
|
'Basic test with diacritics',
|
||||||
|
expectSpans('gadamari', ['が', 'だ', 'ま', 'り']),
|
||||||
|
);
|
||||||
|
test('wi and we', expectSpans('wiwe', ['うぃ', 'うぇ']));
|
||||||
|
test('nb = mb', expectSpans('kanpai', ['か', 'ん', 'ぱ', 'い']));
|
||||||
|
test('nb = mb', expectSpans('kampai', ['か', 'ん', 'ぱ', 'い']));
|
||||||
|
test('Double n', expectSpans('konnichiha', ['こ', 'ん', 'に', 'ち', 'は']));
|
||||||
|
|
||||||
|
// TODO: fix the implementation
|
||||||
|
// test('Double consonant', expectSpans('kappa', ['か', 'っぱ']));
|
||||||
|
});
|
||||||
|
|
||||||
group('Hiragana -> Romaji', () {
|
group('Hiragana -> Romaji', () {
|
||||||
test('Basic test', () {
|
test('Basic test', () {
|
||||||
final result = transliterateHiraganaToLatin('かたまり');
|
final result = transliterateHiraganaToLatin('かたまり');
|
||||||
@@ -63,4 +92,31 @@ void main() {
|
|||||||
expect(result, 'kappa');
|
expect(result, 'kappa');
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
group('Hiragana -> Romaji Spans', () {
|
||||||
|
void Function() expectSpans(String input, List<String> expected) => () {
|
||||||
|
final result = transliterateHiraganaToLatinSpan(input);
|
||||||
|
final trans = transliterateHiraganaToLatin(input);
|
||||||
|
for (int i = 0; i < result.length; i++) {
|
||||||
|
expect(
|
||||||
|
trans.substring(
|
||||||
|
result[i].$2,
|
||||||
|
i == result.length - 1 ? trans.length : result[i + 1].$2,
|
||||||
|
),
|
||||||
|
expected[i],
|
||||||
|
);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
test('Basic test', expectSpans('かたまり', ['ka', 'ta', 'ma', 'ri']));
|
||||||
|
test(
|
||||||
|
'Basic test with diacritics',
|
||||||
|
expectSpans('がだまり', ['ga', 'da', 'ma', 'ri']),
|
||||||
|
);
|
||||||
|
test('wi and we', expectSpans('うぃうぇ', ['whi', 'whe']));
|
||||||
|
test('Double n', expectSpans('こんにちは', ['ko', 'n', 'ni', 'chi', 'ha']));
|
||||||
|
|
||||||
|
// TODO: fix the implementation
|
||||||
|
// test('Double consonant', expectSpans('かっぱ', ['ka', 'ppa']));
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user