Compare commits
1 Commits
word-regro
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
|
c70838d1bf
|
@@ -4,7 +4,7 @@ on:
|
||||
pull_request:
|
||||
push:
|
||||
jobs:
|
||||
evals:
|
||||
build:
|
||||
runs-on: debian-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v6
|
||||
@@ -69,3 +69,6 @@ jobs:
|
||||
ssh-key: ${{ secrets.OYSTEIKT_GITEA_WEBDOCS_SSH_KEY }}
|
||||
host: microbel.pvv.ntnu.no
|
||||
known-hosts: "microbel.pvv.ntnu.no ecdsa-sha2-nistp256 AAAAE2VjZHNhLXNoYTItbmlzdHAyNTYAAAAIbmlzdHAyNTYAAABBBEq0yasKP0mH6PI6ypmuzPzMnbHELo9k+YB5yW534aKudKZS65YsHJKQ9vapOtmegrn5MQbCCgrshf+/XwZcjbM="
|
||||
|
||||
- name: Run benchmarks
|
||||
run: nix develop .# --command dart run benchmark_harness:bench --flavor jit
|
||||
|
||||
5
benchmark/benchmark.dart
Normal file
5
benchmark/benchmark.dart
Normal file
@@ -0,0 +1,5 @@
|
||||
import './search/word_search.dart';
|
||||
|
||||
Future<void> main() async {
|
||||
await WordSearchBenchmark.main();
|
||||
}
|
||||
45
benchmark/search/word_search.dart
Normal file
45
benchmark/search/word_search.dart
Normal file
@@ -0,0 +1,45 @@
|
||||
import 'package:benchmark_harness/benchmark_harness.dart';
|
||||
import 'package:jadb/search.dart';
|
||||
import 'package:sqflite_common/sqlite_api.dart';
|
||||
|
||||
import '../../test/search/setup_database_connection.dart';
|
||||
|
||||
class WordSearchBenchmark extends AsyncBenchmarkBase {
|
||||
Database? connection;
|
||||
|
||||
static final List<String> searchTerms = [
|
||||
'kana',
|
||||
'kanji',
|
||||
'kawaii',
|
||||
'sushi',
|
||||
'ramen',
|
||||
];
|
||||
|
||||
WordSearchBenchmark() : super('WordSearchBenchmark');
|
||||
|
||||
static Future<void> main() async {
|
||||
print('Running WordSearchBenchmark...');
|
||||
await WordSearchBenchmark().report();
|
||||
print('Finished WordSearchBenchmark');
|
||||
}
|
||||
|
||||
@override
|
||||
Future<void> setup() async {
|
||||
connection = await setupDatabaseConnection();
|
||||
}
|
||||
|
||||
@override
|
||||
Future<void> run() async {
|
||||
for (final term in searchTerms) {
|
||||
await connection!.jadbSearchWord(term);
|
||||
}
|
||||
}
|
||||
|
||||
@override
|
||||
Future<void> teardown() async {
|
||||
await connection?.close();
|
||||
}
|
||||
|
||||
// @override
|
||||
// Future<void> exercise() => run();
|
||||
}
|
||||
@@ -21,83 +21,49 @@ List<WordSearchResult> regroupWordSearchResults({
|
||||
}) {
|
||||
final List<WordSearchResult> results = [];
|
||||
|
||||
final Set<int> commonEntryIds = linearWordQueryData.commonEntries
|
||||
final commonEntryIds = linearWordQueryData.commonEntries
|
||||
.map((entry) => entry['entryId'] as int)
|
||||
.toSet();
|
||||
|
||||
final Map<int, List<Map<String, Object?>>> entryReadingElementsByEntryId =
|
||||
linearWordQueryData.readingElements.groupListsBy(
|
||||
(element) => element['entryId'] as int,
|
||||
);
|
||||
|
||||
final Map<int, List<Map<String, Object?>>> entryKanjiElementsByEntryId =
|
||||
linearWordQueryData.kanjiElements.groupListsBy(
|
||||
(element) => element['entryId'] as int,
|
||||
);
|
||||
|
||||
final Map<int, int> elementIdToEntryId = {
|
||||
for (final element in linearWordQueryData.readingElements)
|
||||
element['elementId'] as int: element['entryId'] as int,
|
||||
for (final element in linearWordQueryData.kanjiElements)
|
||||
element['elementId'] as int: element['entryId'] as int,
|
||||
};
|
||||
|
||||
final Map<int, List<Map<String, Object?>>> entryReadingElementInfosByEntryId =
|
||||
linearWordQueryData.readingElementInfos.groupListsBy(
|
||||
(element) => elementIdToEntryId[element['elementId'] as int]!,
|
||||
);
|
||||
|
||||
final Map<int, List<Map<String, Object?>>> entryKanjiElementInfosByEntryId =
|
||||
linearWordQueryData.kanjiElementInfos.groupListsBy(
|
||||
(element) => elementIdToEntryId[element['elementId'] as int]!,
|
||||
);
|
||||
|
||||
final Map<int, List<Map<String, Object?>>>
|
||||
entryReadingElementRestrictionsByEntryId = linearWordQueryData
|
||||
.readingElementRestrictions
|
||||
.groupListsBy(
|
||||
(element) => elementIdToEntryId[element['elementId'] as int]!,
|
||||
);
|
||||
|
||||
final Map<int, JlptLevel> entryJlptTagsByEntryId = linearWordQueryData
|
||||
.jlptTags
|
||||
.groupSetsBy((element) => element['entryId'] as int)
|
||||
.map(
|
||||
(final key, final value) => MapEntry(
|
||||
key,
|
||||
value.map((e) => JlptLevel.fromString(e['jlptLevel'] as String?)).min,
|
||||
),
|
||||
);
|
||||
|
||||
final Map<int, List<Map<String, Object?>>> entrySensesByEntryId =
|
||||
linearWordQueryData.senses.groupListsBy(
|
||||
(element) => element['entryId'] as int,
|
||||
);
|
||||
|
||||
for (final scoredEntryId in entryIds) {
|
||||
final List<Map<String, Object?>> entryReadingElements =
|
||||
entryReadingElementsByEntryId[scoredEntryId.entryId] ?? const [];
|
||||
final List<Map<String, Object?>> entryKanjiElements =
|
||||
entryKanjiElementsByEntryId[scoredEntryId.entryId] ?? const [];
|
||||
final List<Map<String, Object?>> entryReadingElementInfos =
|
||||
entryReadingElementInfosByEntryId[scoredEntryId.entryId] ?? const [];
|
||||
final List<Map<String, Object?>> entryKanjiElementInfos =
|
||||
entryKanjiElementInfosByEntryId[scoredEntryId.entryId] ?? const [];
|
||||
final List<Map<String, Object?>> entryReadingElementRestrictions =
|
||||
entryReadingElementRestrictionsByEntryId[scoredEntryId.entryId] ??
|
||||
const [];
|
||||
final List<Map<String, Object?>> entryReadingElements = linearWordQueryData
|
||||
.readingElements
|
||||
.where((element) => element['entryId'] == scoredEntryId.entryId)
|
||||
.toList();
|
||||
|
||||
final List<Map<String, Object?>> entryKanjiElements = linearWordQueryData
|
||||
.kanjiElements
|
||||
.where((element) => element['entryId'] == scoredEntryId.entryId)
|
||||
.toList();
|
||||
|
||||
final List<Map<String, Object?>> entryJlptTags = linearWordQueryData
|
||||
.jlptTags
|
||||
.where((element) => element['entryId'] == scoredEntryId.entryId)
|
||||
.toList();
|
||||
|
||||
final jlptLevel =
|
||||
entryJlptTags
|
||||
.map((e) => JlptLevel.fromString(e['jlptLevel'] as String?))
|
||||
.sorted((a, b) => b.compareTo(a))
|
||||
.firstOrNull ??
|
||||
JlptLevel.none;
|
||||
|
||||
final isCommon = commonEntryIds.contains(scoredEntryId.entryId);
|
||||
|
||||
final List<Map<String, Object?>> entrySenses = linearWordQueryData.senses
|
||||
.where((element) => element['entryId'] == scoredEntryId.entryId)
|
||||
.toList();
|
||||
|
||||
final GroupedWordResult entryReadingElementsGrouped = _regroupWords(
|
||||
kanjiElements: entryKanjiElements,
|
||||
kanjiElementInfos: entryKanjiElementInfos,
|
||||
entryId: scoredEntryId.entryId,
|
||||
readingElements: entryReadingElements,
|
||||
readingElementInfos: entryReadingElementInfos,
|
||||
readingElementRestrictions: entryReadingElementRestrictions,
|
||||
kanjiElements: entryKanjiElements,
|
||||
readingElementInfos: linearWordQueryData.readingElementInfos,
|
||||
readingElementRestrictions:
|
||||
linearWordQueryData.readingElementRestrictions,
|
||||
kanjiElementInfos: linearWordQueryData.kanjiElementInfos,
|
||||
);
|
||||
|
||||
final List<Map<String, Object?>> entrySenses =
|
||||
entrySensesByEntryId[scoredEntryId.entryId] ?? const [];
|
||||
|
||||
final List<WordSearchSense> entrySensesGrouped = _regroupSenses(
|
||||
senses: entrySenses,
|
||||
senseAntonyms: linearWordQueryData.senseAntonyms,
|
||||
@@ -116,10 +82,6 @@ List<WordSearchResult> regroupWordSearchResults({
|
||||
senseAntonymsXrefData: linearWordQueryData.senseAntonymData,
|
||||
);
|
||||
|
||||
final bool isCommon = commonEntryIds.contains(scoredEntryId.entryId);
|
||||
final JlptLevel jlptLevel =
|
||||
entryJlptTagsByEntryId[scoredEntryId.entryId] ?? JlptLevel.none;
|
||||
|
||||
results.add(
|
||||
WordSearchResult(
|
||||
score: scoredEntryId.score,
|
||||
@@ -151,6 +113,7 @@ class GroupedWordResult {
|
||||
}
|
||||
|
||||
GroupedWordResult _regroupWords({
|
||||
required int entryId,
|
||||
required List<Map<String, Object?>> kanjiElements,
|
||||
required List<Map<String, Object?>> kanjiElementInfos,
|
||||
required List<Map<String, Object?>> readingElements,
|
||||
@@ -159,34 +122,36 @@ GroupedWordResult _regroupWords({
|
||||
}) {
|
||||
final List<WordSearchRuby> rubys = [];
|
||||
|
||||
final Map<int, Set<String>> readingElementRestrictionsSet =
|
||||
readingElementRestrictions
|
||||
.groupSetsBy((element) => element['elementId'] as int)
|
||||
.map(
|
||||
(key, value) => MapEntry(
|
||||
key,
|
||||
value.map((e) => e['restriction'] as String).toSet(),
|
||||
),
|
||||
);
|
||||
final kanjiElements_ = kanjiElements
|
||||
.where((element) => element['entryId'] == entryId)
|
||||
.toList();
|
||||
|
||||
// Construct a cartesian product of kanji + readings, with exceptions made for items marked in `restrictions`.
|
||||
for (final readingElement in readingElements) {
|
||||
if (readingElement['doesNotMatchKanji'] == 1 || kanjiElements.isEmpty) {
|
||||
final readingElements_ = readingElements
|
||||
.where((element) => element['entryId'] == entryId)
|
||||
.toList();
|
||||
|
||||
final readingElementRestrictions_ = readingElementRestrictions
|
||||
.where((element) => element['entryId'] == entryId)
|
||||
.toList();
|
||||
|
||||
for (final readingElement in readingElements_) {
|
||||
if (readingElement['doesNotMatchKanji'] == 1 || kanjiElements_.isEmpty) {
|
||||
final ruby = WordSearchRuby(base: readingElement['reading'] as String);
|
||||
rubys.add(ruby);
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
for (final kanjiElement in kanjiElements) {
|
||||
for (final kanjiElement in kanjiElements_) {
|
||||
final kanji = kanjiElement['reading'] as String;
|
||||
final reading = readingElement['reading'] as String;
|
||||
|
||||
// The 'restrictions' act as an allowlist, meaning that non-matching kanji elements should be ignored.
|
||||
final restrictions =
|
||||
readingElementRestrictionsSet[readingElement['elementId'] as int] ??
|
||||
{};
|
||||
if (restrictions.isNotEmpty && !restrictions.contains(kanji)) {
|
||||
final restrictions = readingElementRestrictions_
|
||||
.where((element) => element['reading'] == reading)
|
||||
.toList();
|
||||
|
||||
if (restrictions.isNotEmpty &&
|
||||
!restrictions.any((element) => element['restriction'] == kanji)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -195,30 +160,35 @@ GroupedWordResult _regroupWords({
|
||||
}
|
||||
}
|
||||
|
||||
assert(
|
||||
rubys.isNotEmpty,
|
||||
'No readings found for entryId: ${kanjiElements.firstOrNull?['entryId'] ?? readingElements.firstOrNull?['entryId'] ?? '???'}',
|
||||
);
|
||||
assert(rubys.isNotEmpty, 'No readings found for entryId: $entryId');
|
||||
|
||||
final Map<int, String> readingElementIdsToReading = {
|
||||
for (final element in readingElements)
|
||||
for (final element in readingElements_)
|
||||
element['elementId'] as int: element['reading'] as String,
|
||||
};
|
||||
|
||||
final Map<int, String> kanjiElementIdsToReading = {
|
||||
for (final element in kanjiElements)
|
||||
for (final element in kanjiElements_)
|
||||
element['elementId'] as int: element['reading'] as String,
|
||||
};
|
||||
|
||||
final readingElementInfos_ = readingElementInfos
|
||||
.where((element) => element['entryId'] == entryId)
|
||||
.toList();
|
||||
|
||||
final kanjiElementInfos_ = kanjiElementInfos
|
||||
.where((element) => element['entryId'] == entryId)
|
||||
.toList();
|
||||
|
||||
return GroupedWordResult(
|
||||
rubys: rubys,
|
||||
readingInfos: {
|
||||
for (final rei in readingElementInfos)
|
||||
for (final rei in readingElementInfos_)
|
||||
readingElementIdsToReading[rei['elementId'] as int]!:
|
||||
JMdictReadingInfo.fromId(rei['info'] as String),
|
||||
},
|
||||
kanjiInfos: {
|
||||
for (final kei in kanjiElementInfos)
|
||||
for (final kei in kanjiElementInfos_)
|
||||
kanjiElementIdsToReading[kei['elementId'] as int]!:
|
||||
JMdictKanjiInfo.fromId(kei['info'] as String),
|
||||
},
|
||||
|
||||
@@ -33,6 +33,14 @@ packages:
|
||||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "2.13.0"
|
||||
benchmark_harness:
|
||||
dependency: "direct dev"
|
||||
description:
|
||||
name: benchmark_harness
|
||||
sha256: a2d3c4c83cac0126bf38e41eaf7bd9ed4f6635f1ee1a0cbc6f79fa9736c62cbd
|
||||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "2.4.0"
|
||||
boolean_selector:
|
||||
dependency: transitive
|
||||
description:
|
||||
|
||||
@@ -18,6 +18,7 @@ dependencies:
|
||||
xml: ^6.5.0
|
||||
|
||||
dev_dependencies:
|
||||
benchmark_harness: ^2.4.0
|
||||
lints: ^6.0.0
|
||||
test: ^1.25.15
|
||||
|
||||
|
||||
Reference in New Issue
Block a user