init commit
This commit is contained in:
0
bin/common.dart
Normal file
0
bin/common.dart
Normal file
16
bin/ja_db.dart
Normal file
16
bin/ja_db.dart
Normal file
@@ -0,0 +1,16 @@
|
||||
import 'dart:io';
|
||||
|
||||
import 'package:sqflite_common_ffi/sqflite_ffi.dart';
|
||||
|
||||
import 'jmdict/parser.dart';
|
||||
import 'kanjidic/parser.dart';
|
||||
import 'radkfile/parser.dart';
|
||||
|
||||
Future<void> main(List<String> arguments) async {
|
||||
final db = await databaseFactoryFfi
|
||||
.openDatabase(Directory.current.uri.resolve('main.db').path);
|
||||
await addDataFromJMdict(db);
|
||||
await addDataFromRADKFILE(db);
|
||||
await addDataFromKANJIDIC(db);
|
||||
}
|
||||
|
||||
235
bin/jmdict/objects.dart
Normal file
235
bin/jmdict/objects.dart
Normal file
@@ -0,0 +1,235 @@
|
||||
import '../common.dart';
|
||||
import '../objects.dart';
|
||||
|
||||
class TableNames {
|
||||
static const String entry = 'JMdict_Entry';
|
||||
static const String entryByKana = 'JMdict_EntryByKana';
|
||||
static const String entryByEnglish = 'JMdict_EntryByEnglish';
|
||||
static const String kanjiElement = 'JMdict_KanjiElement';
|
||||
static const String kanjiInfo = 'JMdict_KanjiElementInfo';
|
||||
static const String readingElement = 'JMdict_ReadingElement';
|
||||
static const String readingInfo = 'JMdict_ReadingElementInfo';
|
||||
static const String readingRestriction = 'JMdict_ReadingElementRestriction';
|
||||
static const String sense = 'JMdict_Sense';
|
||||
static const String senseAntonyms = 'JMdict_SenseAntonym';
|
||||
static const String senseDialect = 'JMdict_SenseDialect';
|
||||
static const String senseField = 'JMdict_SenseField';
|
||||
static const String senseGlossary = 'JMdict_SenseGlossary';
|
||||
static const String senseInfo = 'JMdict_SenseInfo';
|
||||
static const String senseLanguageSource = 'JMdict_SenseLanguageSource';
|
||||
static const String senseMisc = 'JMdict_SenseMisc';
|
||||
static const String sensePOS = 'JMdict_SensePOS';
|
||||
static const String senseRestrictedToKanji = 'JMdict_SenseRestrictedToKanji';
|
||||
static const String senseRestrictedToReading = 'JMdict_SenseRestrictedToReading';
|
||||
static const String senseSeeAlso = 'JMdict_SenseSeeAlso';
|
||||
}
|
||||
|
||||
abstract class Element extends SQLWritable {
|
||||
final String reading;
|
||||
final int? news;
|
||||
final int? ichi;
|
||||
final int? spec;
|
||||
final int? gai;
|
||||
final int? nf;
|
||||
const Element({
|
||||
required this.reading,
|
||||
this.news,
|
||||
this.ichi,
|
||||
this.spec,
|
||||
this.gai,
|
||||
this.nf,
|
||||
});
|
||||
|
||||
Map<String, Object?> get sqlValue => {
|
||||
'reading': reading,
|
||||
'news': news,
|
||||
'ichi': ichi,
|
||||
'spec': spec,
|
||||
'gai': gai,
|
||||
'nf': nf,
|
||||
};
|
||||
}
|
||||
|
||||
class KanjiElement extends Element {
|
||||
List<String> info;
|
||||
|
||||
KanjiElement({
|
||||
this.info = const [],
|
||||
required String reading,
|
||||
int? news,
|
||||
int? ichi,
|
||||
int? spec,
|
||||
int? gai,
|
||||
int? nf,
|
||||
}) : super(
|
||||
reading: reading,
|
||||
news: news,
|
||||
ichi: ichi,
|
||||
spec: spec,
|
||||
gai: gai,
|
||||
nf: nf,
|
||||
);
|
||||
}
|
||||
|
||||
class ReadingElement extends Element {
|
||||
List<String> info;
|
||||
List<String> restrictions;
|
||||
|
||||
ReadingElement({
|
||||
this.info = const [],
|
||||
this.restrictions = const [],
|
||||
required String reading,
|
||||
int? news,
|
||||
int? ichi,
|
||||
int? spec,
|
||||
int? gai,
|
||||
int? nf,
|
||||
}) : super(
|
||||
reading: reading,
|
||||
news: news,
|
||||
ichi: ichi,
|
||||
spec: spec,
|
||||
gai: gai,
|
||||
nf: nf,
|
||||
);
|
||||
}
|
||||
|
||||
class LanguageSource extends SQLWritable {
|
||||
final String language;
|
||||
final String? phrase;
|
||||
final bool fullyDescribesSense;
|
||||
final bool constructedFromSmallerWords;
|
||||
|
||||
const LanguageSource({
|
||||
required this.language,
|
||||
this.phrase,
|
||||
this.fullyDescribesSense = true,
|
||||
this.constructedFromSmallerWords = false,
|
||||
});
|
||||
|
||||
@override
|
||||
Map<String, Object?> get sqlValue => {
|
||||
'language': language,
|
||||
'phrase': phrase,
|
||||
'fullyDescribesSense': fullyDescribesSense,
|
||||
'constructedFromSmallerWords': constructedFromSmallerWords,
|
||||
};
|
||||
}
|
||||
|
||||
class Glossary extends SQLWritable {
|
||||
final String language;
|
||||
final String phrase;
|
||||
final String? type;
|
||||
|
||||
const Glossary({
|
||||
required this.language,
|
||||
required this.phrase,
|
||||
this.type,
|
||||
});
|
||||
|
||||
Map<String, Object?> get sqlValue => {
|
||||
'language': language,
|
||||
'phrase': phrase,
|
||||
'type': type,
|
||||
};
|
||||
}
|
||||
|
||||
final kanaRegex =
|
||||
RegExp(r'^[\p{Script=Katakana}\p{Script=Hiragana}ー]+$', unicode: true);
|
||||
|
||||
class XRefParts {
|
||||
final String? kanjiRef;
|
||||
final String? readingRef;
|
||||
final int? senseNum;
|
||||
|
||||
const XRefParts({
|
||||
this.kanjiRef,
|
||||
this.readingRef,
|
||||
this.senseNum,
|
||||
}) : assert(kanjiRef != null || readingRef != null);
|
||||
|
||||
factory XRefParts.fromString(String s) {
|
||||
final parts = s.split('・');
|
||||
if (parts.length == 1) {
|
||||
if (parts[0].contains(kanaRegex)) {
|
||||
return XRefParts(readingRef: parts[0]);
|
||||
}
|
||||
return XRefParts(kanjiRef: parts[0]);
|
||||
} else if (parts.length == 2) {
|
||||
if (int.tryParse(parts[1]) != null) {
|
||||
if (parts[0].contains(kanaRegex)) {
|
||||
return XRefParts(readingRef: parts[0], senseNum: int.parse(parts[1]));
|
||||
}
|
||||
return XRefParts(kanjiRef: parts[0], senseNum: int.parse(parts[1]));
|
||||
}
|
||||
return XRefParts(kanjiRef: parts[0], readingRef: parts[1]);
|
||||
} else if (parts.length == 3) {
|
||||
return XRefParts(
|
||||
kanjiRef: parts[0],
|
||||
readingRef: parts[1],
|
||||
senseNum: int.parse(parts[2]),
|
||||
);
|
||||
}
|
||||
|
||||
return XRefParts();
|
||||
}
|
||||
}
|
||||
|
||||
class XRef {
|
||||
final String entryId;
|
||||
final String reading;
|
||||
|
||||
const XRef({
|
||||
required this.entryId,
|
||||
required this.reading,
|
||||
});
|
||||
}
|
||||
|
||||
class Sense extends SQLWritable {
|
||||
final int id;
|
||||
final List<XRefParts> antonyms;
|
||||
final List<String> dialects;
|
||||
final List<String> fields;
|
||||
final List<String> info;
|
||||
final List<LanguageSource> languageSource;
|
||||
final List<Glossary> glossary;
|
||||
final List<String> misc;
|
||||
final List<String> pos;
|
||||
final List<String> restrictedToKanji;
|
||||
final List<String> restrictedToReading;
|
||||
final List<XRefParts> seeAlso;
|
||||
|
||||
const Sense({
|
||||
required this.id,
|
||||
this.antonyms = const [],
|
||||
this.dialects = const [],
|
||||
this.fields = const [],
|
||||
this.info = const [],
|
||||
this.languageSource = const [],
|
||||
this.glossary = const [],
|
||||
this.misc = const [],
|
||||
this.pos = const [],
|
||||
this.restrictedToKanji = const [],
|
||||
this.restrictedToReading = const [],
|
||||
this.seeAlso = const [],
|
||||
});
|
||||
|
||||
@override
|
||||
Map<String, Object?> get sqlValue => {};
|
||||
}
|
||||
|
||||
class Entry extends SQLWritable {
|
||||
final int id;
|
||||
final List<KanjiElement> kanji;
|
||||
final List<ReadingElement> readings;
|
||||
final List<Sense> senses;
|
||||
|
||||
const Entry({
|
||||
required this.id,
|
||||
required this.kanji,
|
||||
required this.readings,
|
||||
required this.senses,
|
||||
});
|
||||
|
||||
Map<String, Object?> get sqlValue => {'id': id};
|
||||
}
|
||||
346
bin/jmdict/parser.dart
Normal file
346
bin/jmdict/parser.dart
Normal file
@@ -0,0 +1,346 @@
|
||||
import 'dart:collection';
|
||||
import 'dart:io';
|
||||
|
||||
import 'package:sqflite_common/sqlite_api.dart';
|
||||
import 'package:xml/xml.dart';
|
||||
|
||||
import '../romaji_transliteration.dart';
|
||||
import 'objects.dart';
|
||||
|
||||
List<int?> getPriNums(XmlElement e, String prefix) {
|
||||
int? news, ichi, spec, gai, nf;
|
||||
for (final pri in e.findElements('${prefix}_pri')) {
|
||||
final txt = pri.innerText;
|
||||
if (txt.startsWith('news'))
|
||||
news = int.parse(txt.substring(4));
|
||||
else if (txt.startsWith('ichi'))
|
||||
ichi = int.parse(txt.substring(4));
|
||||
else if (txt.startsWith('spec'))
|
||||
spec = int.parse(txt.substring(4));
|
||||
else if (txt.startsWith('gai'))
|
||||
gai = int.parse(txt.substring(3));
|
||||
else if (txt.startsWith('nf')) nf = int.parse(txt.substring(2));
|
||||
}
|
||||
return [news, ichi, spec, gai, nf];
|
||||
}
|
||||
|
||||
List<Entry> transformXML(XmlElement root) {
|
||||
final List<Entry> entries = [];
|
||||
|
||||
int senseId = 0;
|
||||
for (final entry in root.childElements) {
|
||||
final entryId = int.parse(entry.findElements('ent_seq').first.innerText);
|
||||
|
||||
final List<KanjiElement> kanjiEls = [];
|
||||
final List<ReadingElement> readingEls = [];
|
||||
final List<Sense> senses = [];
|
||||
|
||||
for (final k_ele in entry.findAllElements('k_ele')) {
|
||||
final ke_pri = getPriNums(k_ele, 'ke');
|
||||
kanjiEls.add(
|
||||
KanjiElement(
|
||||
info: k_ele.findElements('ke_inf').map((e) => e.innerText).toList(),
|
||||
reading: k_ele.findElements('keb').first.innerText,
|
||||
news: ke_pri[0],
|
||||
ichi: ke_pri[1],
|
||||
spec: ke_pri[2],
|
||||
gai: ke_pri[3],
|
||||
nf: ke_pri[4],
|
||||
),
|
||||
);
|
||||
}
|
||||
|
||||
for (final r_ele in entry.findAllElements('r_ele')) {
|
||||
final re_pri = getPriNums(r_ele, 're');
|
||||
readingEls.add(
|
||||
ReadingElement(
|
||||
info: r_ele
|
||||
.findElements('re_inf')
|
||||
.map((e) => e.innerText.substring(1, e.innerText.length - 1))
|
||||
.toList(),
|
||||
restrictions:
|
||||
r_ele.findElements('re_restr').map((e) => e.innerText).toList(),
|
||||
reading: r_ele.findElements('reb').first.innerText,
|
||||
news: re_pri[0],
|
||||
ichi: re_pri[1],
|
||||
spec: re_pri[2],
|
||||
gai: re_pri[3],
|
||||
nf: re_pri[4],
|
||||
),
|
||||
);
|
||||
}
|
||||
|
||||
for (final sense in entry.findAllElements('sense')) {
|
||||
senseId++;
|
||||
senses.add(
|
||||
Sense(
|
||||
id: senseId,
|
||||
restrictedToKanji:
|
||||
sense.findElements('stagk').map((e) => e.innerText).toList(),
|
||||
restrictedToReading:
|
||||
sense.findElements('stagr').map((e) => e.innerText).toList(),
|
||||
pos: sense
|
||||
.findElements('pos')
|
||||
.map((e) => e.innerText.substring(1, e.innerText.length - 1))
|
||||
.toList(),
|
||||
misc: sense
|
||||
.findElements('misc')
|
||||
.map((e) => e.innerText.substring(1, e.innerText.length - 1))
|
||||
.toList(),
|
||||
dialects: sense
|
||||
.findElements('dial')
|
||||
.map((e) => e.innerText.substring(1, e.innerText.length - 1))
|
||||
.toList(),
|
||||
info: sense.findElements('s_inf').map((e) => e.innerText).toList(),
|
||||
languageSource: sense
|
||||
.findElements('lsource')
|
||||
.map(
|
||||
(e) => LanguageSource(
|
||||
language: e.getAttribute('xml:lang') ?? 'eng',
|
||||
fullyDescribesSense: e.getAttribute('ls_type') == 'part',
|
||||
constructedFromSmallerWords:
|
||||
e.getAttribute('ls_wasei') == 'y',
|
||||
),
|
||||
)
|
||||
.toList(),
|
||||
glossary: sense
|
||||
.findElements('gloss')
|
||||
.map(
|
||||
(e) => Glossary(
|
||||
language: e.getAttribute('xml:lang') ?? 'eng',
|
||||
phrase: e.innerText,
|
||||
type: e.getAttribute('g_type'),
|
||||
),
|
||||
)
|
||||
.toList(),
|
||||
antonyms: sense
|
||||
.findElements('ant')
|
||||
.map((e) => XRefParts.fromString(e.innerText))
|
||||
.toList(),
|
||||
seeAlso: sense
|
||||
.findElements('xref')
|
||||
.map((e) => XRefParts.fromString(e.innerText))
|
||||
.toList(),
|
||||
),
|
||||
);
|
||||
}
|
||||
|
||||
entries.add(
|
||||
Entry(
|
||||
id: entryId,
|
||||
kanji: kanjiEls,
|
||||
readings: readingEls,
|
||||
senses: senses,
|
||||
),
|
||||
);
|
||||
}
|
||||
|
||||
return entries;
|
||||
}
|
||||
|
||||
Future<void> insertIntoDB(List<Entry> entries, Database db) async {
|
||||
print(' [JMdict] Batch 1');
|
||||
Batch b = db.batch();
|
||||
for (final e in entries) {
|
||||
b.insert(TableNames.entry, e.sqlValue);
|
||||
for (final k in e.kanji) {
|
||||
b.insert(TableNames.kanjiElement, k.sqlValue..addAll({'entryId': e.id}));
|
||||
// b.insert(
|
||||
// TableNames.entryByKana,
|
||||
// {'entryId': e.id, 'kana': transliterateKatakanaToHiragana(k.reading)},
|
||||
// // Some entries have the same reading twice with difference in katakana and hiragana
|
||||
// conflictAlgorithm: ConflictAlgorithm.ignore,
|
||||
// );
|
||||
for (final i in k.info) {
|
||||
b.insert(
|
||||
TableNames.kanjiInfo,
|
||||
{'entryId': e.id, 'reading': k.reading, 'info': i},
|
||||
);
|
||||
}
|
||||
}
|
||||
for (final r in e.readings) {
|
||||
b.insert(
|
||||
TableNames.readingElement,
|
||||
r.sqlValue..addAll({'entryId': e.id}),
|
||||
);
|
||||
|
||||
b.insert(
|
||||
TableNames.entryByKana,
|
||||
{'entryId': e.id, 'kana': transliterateKanaToLatin(r.reading)},
|
||||
// Some entries have the same reading twice with difference in katakana and hiragana
|
||||
conflictAlgorithm: ConflictAlgorithm.ignore,
|
||||
);
|
||||
for (final i in r.info) {
|
||||
b.insert(
|
||||
TableNames.readingInfo,
|
||||
{'entryId': e.id, 'reading': r.reading, 'info': i},
|
||||
);
|
||||
}
|
||||
for (final res in r.restrictions) {
|
||||
b.insert(
|
||||
TableNames.readingRestriction,
|
||||
{'entryId': e.id, 'reading': r.reading, 'restriction': res},
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
for (final s in e.senses) {
|
||||
for (final g in s.glossary) {
|
||||
if (g.language == "eng")
|
||||
b.insert(
|
||||
TableNames.entryByEnglish,
|
||||
{'entryId': e.id, 'english': g.phrase},
|
||||
// Some entries have the same reading twice with difference in katakana and hiragana
|
||||
conflictAlgorithm: ConflictAlgorithm.ignore,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
await b.commit();
|
||||
|
||||
print(' [JMdict] Building trees');
|
||||
SplayTreeMap<String, Set<Entry>> entriesByKanji = SplayTreeMap();
|
||||
for (final entry in entries) {
|
||||
for (final kanji in entry.kanji) {
|
||||
if (entriesByKanji.containsKey(kanji.reading)) {
|
||||
entriesByKanji.update(kanji.reading, (list) => list..add(entry));
|
||||
} else {
|
||||
entriesByKanji.putIfAbsent(kanji.reading, () => {entry});
|
||||
}
|
||||
}
|
||||
}
|
||||
SplayTreeMap<String, Set<Entry>> entriesByReading = SplayTreeMap();
|
||||
for (final entry in entries) {
|
||||
for (final reading in entry.readings) {
|
||||
if (entriesByReading.containsKey(reading.reading)) {
|
||||
entriesByReading.update(reading.reading, (list) => list..add(entry));
|
||||
} else {
|
||||
entriesByReading.putIfAbsent(reading.reading, () => {entry});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
print(' [JMdict] Batch 2');
|
||||
b = db.batch();
|
||||
|
||||
for (final e in entries) {
|
||||
for (final s in e.senses) {
|
||||
b.insert(
|
||||
TableNames.sense, s.sqlValue..addAll({'id': s.id, 'entryId': e.id}));
|
||||
|
||||
for (final d in s.dialects) {
|
||||
b.insert(TableNames.senseDialect, {'senseId': s.id, 'dialect': d});
|
||||
}
|
||||
for (final f in s.fields) {
|
||||
b.insert(TableNames.senseField, {'senseId': s.id, 'field': f});
|
||||
}
|
||||
for (final i in s.info) {
|
||||
b.insert(TableNames.senseInfo, {'senseId': s.id, 'info': i});
|
||||
}
|
||||
for (final m in s.misc) {
|
||||
b.insert(TableNames.senseMisc, {'senseId': s.id, 'misc': m});
|
||||
}
|
||||
for (final p in s.pos) {
|
||||
b.insert(TableNames.sensePOS, {'senseId': s.id, 'pos': p});
|
||||
}
|
||||
for (final l in s.languageSource) {
|
||||
b.insert(
|
||||
TableNames.senseLanguageSource,
|
||||
l.sqlValue..addAll({'senseId': s.id}),
|
||||
);
|
||||
}
|
||||
for (final rk in s.restrictedToKanji) {
|
||||
b.insert(
|
||||
TableNames.senseRestrictedToKanji,
|
||||
{'entryId': e.id, 'senseId': s.id, 'kanji': rk},
|
||||
);
|
||||
}
|
||||
for (final rr in s.restrictedToReading) {
|
||||
b.insert(
|
||||
TableNames.senseRestrictedToReading,
|
||||
{'entryId': e.id, 'senseId': s.id, 'reading': rr},
|
||||
);
|
||||
}
|
||||
for (final ls in s.languageSource) {
|
||||
b.insert(
|
||||
TableNames.senseLanguageSource,
|
||||
ls.sqlValue..addAll({'senseId': s.id}),
|
||||
);
|
||||
}
|
||||
for (final g in s.glossary) {
|
||||
if (g.language == 'eng')
|
||||
b.insert(
|
||||
TableNames.senseGlossary,
|
||||
g.sqlValue..addAll({'senseId': s.id}),
|
||||
// There are some duplicate glossary, especially in
|
||||
// the other languages.
|
||||
conflictAlgorithm: ConflictAlgorithm.ignore,
|
||||
);
|
||||
}
|
||||
|
||||
for (final xref in s.seeAlso) {
|
||||
final Set<Entry> entries;
|
||||
if (xref.kanjiRef != null && xref.readingRef != null) {
|
||||
entries = entriesByKanji[xref.kanjiRef]!
|
||||
.difference(entriesByReading[xref.readingRef]!);
|
||||
} else if (xref.kanjiRef != null) {
|
||||
entries = entriesByKanji[xref.kanjiRef]!;
|
||||
} else {
|
||||
entries = entriesByReading[xref.readingRef]!;
|
||||
}
|
||||
for (final ex in entries)
|
||||
if (!(xref.senseNum != null && xref.senseNum! > ex.senses.length)) {
|
||||
b.insert(
|
||||
TableNames.senseSeeAlso,
|
||||
{
|
||||
'senseId': s.id,
|
||||
'xrefEntryId': ex.id,
|
||||
'seeAlsoKanji': xref.kanjiRef,
|
||||
'seeAlsoReading': xref.readingRef,
|
||||
'seeAlsoSense': xref.senseNum,
|
||||
},
|
||||
);
|
||||
}
|
||||
}
|
||||
for (final ant in s.antonyms) {
|
||||
final Set<Entry> entries;
|
||||
if (ant.kanjiRef != null && ant.readingRef != null) {
|
||||
entries = entriesByKanji[ant.kanjiRef]!
|
||||
.difference(entriesByReading[ant.readingRef]!);
|
||||
} else if (ant.kanjiRef != null) {
|
||||
entries = entriesByKanji[ant.kanjiRef]!;
|
||||
} else {
|
||||
entries = entriesByReading[ant.readingRef]!;
|
||||
}
|
||||
for (final ex in entries) {
|
||||
if (!(ant.senseNum != null && ant.senseNum! > ex.senses.length)) {
|
||||
b.insert(TableNames.senseAntonyms, {
|
||||
'senseId': s.id,
|
||||
'xrefEntryId': ex.id,
|
||||
'antonymKanji': ant.kanjiRef,
|
||||
'antonymReading': ant.readingRef,
|
||||
'antonymSense': ant.senseNum,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
await b.commit();
|
||||
}
|
||||
|
||||
Future<void> addDataFromJMdict(Database db) async {
|
||||
print('[JMdict] Reading file...');
|
||||
String rawXML = File('data/JMdict.xml').readAsStringSync();
|
||||
|
||||
print('[JMdict] Parsing XML...');
|
||||
XmlElement root = XmlDocument.parse(rawXML).getElement('JMdict')!;
|
||||
|
||||
print('[JMdict] Transforming data...');
|
||||
final entries = transformXML(root);
|
||||
|
||||
print('[JMdict] Writing to database...');
|
||||
await insertIntoDB(entries, db);
|
||||
}
|
||||
284
bin/kanjidic/objects.dart
Normal file
284
bin/kanjidic/objects.dart
Normal file
@@ -0,0 +1,284 @@
|
||||
import '../objects.dart';
|
||||
|
||||
class TableNames {
|
||||
static const String character = 'KANJIDIC_Character';
|
||||
static const String radicalName = 'KANJIDIC_RadicalName';
|
||||
static const String codepoint = 'KANJIDIC_Codepoint';
|
||||
static const String radical = 'KANJIDIC_Radical';
|
||||
static const String strokeMiscount = 'KANJIDIC_StrokeMiscount';
|
||||
static const String variant = 'KANJIDIC_Variant';
|
||||
static const String dictionaryReference = '_KANJIDIC_DictionaryReference_Part1';
|
||||
static const String dictionaryReferenceMoro = '_KANJIDIC_DictionaryReference_Moro';
|
||||
static const String queryCode = 'KANJIDIC_QueryCode';
|
||||
static const String reading = 'KANJIDIC_Reading';
|
||||
static const String kunyomi = 'KANJIDIC_Kunyomi';
|
||||
static const String onyomi = 'KANJIDIC_Onyomi';
|
||||
static const String meaning = 'KANJIDIC_Meaning';
|
||||
static const String nanori = 'KANJIDIC_Nanori';
|
||||
}
|
||||
|
||||
class CodePoint extends SQLWritable {
|
||||
final String kanji;
|
||||
final String type;
|
||||
final String codepoint;
|
||||
|
||||
const CodePoint({
|
||||
required this.kanji,
|
||||
required this.type,
|
||||
required this.codepoint,
|
||||
});
|
||||
|
||||
@override
|
||||
Map<String, Object?> get sqlValue => {
|
||||
'kanji': kanji,
|
||||
'type': type,
|
||||
'codepoint': codepoint,
|
||||
};
|
||||
}
|
||||
|
||||
class Radical extends SQLWritable {
|
||||
final String kanji;
|
||||
final String type;
|
||||
final String radical;
|
||||
|
||||
const Radical({
|
||||
required this.kanji,
|
||||
required this.type,
|
||||
required this.radical,
|
||||
});
|
||||
|
||||
@override
|
||||
Map<String, Object?> get sqlValue => {
|
||||
'kanji': kanji,
|
||||
'type': type,
|
||||
'radical': radical,
|
||||
};
|
||||
}
|
||||
|
||||
class StrokeMiscount extends SQLWritable {
|
||||
final String kanji;
|
||||
final int strokeCount;
|
||||
|
||||
const StrokeMiscount({
|
||||
required this.kanji,
|
||||
required this.strokeCount,
|
||||
});
|
||||
|
||||
@override
|
||||
Map<String, Object?> get sqlValue => {
|
||||
'kanji': kanji,
|
||||
'strokeCount': strokeCount,
|
||||
};
|
||||
}
|
||||
|
||||
class Variant extends SQLWritable {
|
||||
final String kanji;
|
||||
final String type;
|
||||
final String variant;
|
||||
|
||||
const Variant({
|
||||
required this.kanji,
|
||||
required this.type,
|
||||
required this.variant,
|
||||
});
|
||||
|
||||
@override
|
||||
Map<String, Object?> get sqlValue => {
|
||||
'kanji': kanji,
|
||||
'type': type,
|
||||
'variant': variant,
|
||||
};
|
||||
}
|
||||
|
||||
class DictionaryReference extends SQLWritable {
|
||||
final String kanji;
|
||||
final String type;
|
||||
final String ref;
|
||||
|
||||
const DictionaryReference({
|
||||
required this.kanji,
|
||||
required this.type,
|
||||
required this.ref,
|
||||
});
|
||||
|
||||
@override
|
||||
Map<String, Object?> get sqlValue => {
|
||||
'kanji': kanji,
|
||||
'type': type,
|
||||
'ref': ref,
|
||||
};
|
||||
}
|
||||
|
||||
class DictionaryReferenceMoro extends SQLWritable {
|
||||
final String kanji;
|
||||
final String ref;
|
||||
final int? volume;
|
||||
final int? page;
|
||||
|
||||
const DictionaryReferenceMoro({
|
||||
required this.kanji,
|
||||
required this.ref,
|
||||
required this.volume,
|
||||
required this.page,
|
||||
});
|
||||
|
||||
@override
|
||||
Map<String, Object?> get sqlValue => {
|
||||
'kanji': kanji,
|
||||
'ref': ref,
|
||||
'volume': volume,
|
||||
'page': page,
|
||||
};
|
||||
}
|
||||
|
||||
class QueryCode extends SQLWritable {
|
||||
final String kanji;
|
||||
final String code;
|
||||
final String type;
|
||||
final String? skipMisclassification;
|
||||
|
||||
const QueryCode({
|
||||
required this.kanji,
|
||||
required this.code,
|
||||
required this.type,
|
||||
required this.skipMisclassification,
|
||||
});
|
||||
|
||||
@override
|
||||
Map<String, Object?> get sqlValue => {
|
||||
'kanji': kanji,
|
||||
'code': code,
|
||||
'type': type,
|
||||
'skipMisclassification': skipMisclassification,
|
||||
};
|
||||
}
|
||||
|
||||
class Reading extends SQLWritable {
|
||||
final String kanji;
|
||||
final String type;
|
||||
final String reading;
|
||||
|
||||
const Reading({
|
||||
required this.kanji,
|
||||
required this.type,
|
||||
required this.reading,
|
||||
});
|
||||
|
||||
@override
|
||||
Map<String, Object?> get sqlValue => {
|
||||
'kanji': kanji,
|
||||
'type': type,
|
||||
'reading': reading,
|
||||
};
|
||||
}
|
||||
|
||||
class Kunyomi extends SQLWritable {
|
||||
final String kanji;
|
||||
final String yomi;
|
||||
final bool isJouyou;
|
||||
|
||||
const Kunyomi({
|
||||
required this.kanji,
|
||||
required this.yomi,
|
||||
required this.isJouyou,
|
||||
});
|
||||
|
||||
@override
|
||||
Map<String, Object?> get sqlValue => {
|
||||
'kanji': kanji,
|
||||
'yomi': yomi,
|
||||
'isJouyou': isJouyou,
|
||||
};
|
||||
}
|
||||
|
||||
class Onyomi extends SQLWritable {
|
||||
final String kanji;
|
||||
final String yomi;
|
||||
final bool isJouyou;
|
||||
final String? type;
|
||||
|
||||
const Onyomi({
|
||||
required this.kanji,
|
||||
required this.yomi,
|
||||
required this.isJouyou,
|
||||
required this.type,
|
||||
});
|
||||
|
||||
@override
|
||||
Map<String, Object?> get sqlValue => {
|
||||
'kanji': kanji,
|
||||
'yomi': yomi,
|
||||
'isJouyou': isJouyou,
|
||||
'type': type,
|
||||
};
|
||||
}
|
||||
|
||||
class Meaning extends SQLWritable {
|
||||
final String kanji;
|
||||
final String language;
|
||||
final String meaning;
|
||||
|
||||
const Meaning({
|
||||
required this.kanji,
|
||||
required this.language,
|
||||
this.meaning = 'eng',
|
||||
});
|
||||
|
||||
@override
|
||||
Map<String, Object?> get sqlValue => {
|
||||
'kanji': kanji,
|
||||
'language': language,
|
||||
'meaning': meaning,
|
||||
};
|
||||
}
|
||||
|
||||
class Character extends SQLWritable {
|
||||
final String literal;
|
||||
final int strokeCount;
|
||||
final int? grade;
|
||||
final int? frequency;
|
||||
final int? jlpt;
|
||||
|
||||
final List<String> radicalName;
|
||||
final List<CodePoint> codepoints;
|
||||
final List<Radical> radicals;
|
||||
final List<int> strokeMiscounts;
|
||||
final List<Variant> variants;
|
||||
final List<DictionaryReference> dictionaryReferences;
|
||||
final List<DictionaryReferenceMoro> dictionaryReferencesMoro;
|
||||
final List<QueryCode> querycodes;
|
||||
final List<Reading> readings;
|
||||
final List<Onyomi> onyomi;
|
||||
final List<Kunyomi> kunyomi;
|
||||
final List<Meaning> meanings;
|
||||
final List<String> nanori;
|
||||
|
||||
const Character({
|
||||
required this.literal,
|
||||
required this.strokeCount,
|
||||
this.grade,
|
||||
this.frequency,
|
||||
this.jlpt,
|
||||
this.radicalName = const [],
|
||||
this.codepoints = const [],
|
||||
this.radicals = const [],
|
||||
this.strokeMiscounts = const [],
|
||||
this.variants = const [],
|
||||
this.dictionaryReferences = const [],
|
||||
this.dictionaryReferencesMoro = const [],
|
||||
this.querycodes = const [],
|
||||
this.readings = const [],
|
||||
this.onyomi = const [],
|
||||
this.kunyomi = const [],
|
||||
this.meanings = const [],
|
||||
this.nanori = const [],
|
||||
});
|
||||
|
||||
Map<String, Object?> get sqlValue => {
|
||||
'literal': literal,
|
||||
'grade': grade,
|
||||
'strokeCount': strokeCount,
|
||||
'frequency': frequency,
|
||||
'jlpt': jlpt,
|
||||
};
|
||||
}
|
||||
231
bin/kanjidic/parser.dart
Normal file
231
bin/kanjidic/parser.dart
Normal file
@@ -0,0 +1,231 @@
|
||||
import 'dart:io';
|
||||
|
||||
import 'package:sqflite_common/sqlite_api.dart';
|
||||
import 'package:xml/xml.dart';
|
||||
import 'package:collection/collection.dart';
|
||||
|
||||
import 'objects.dart';
|
||||
|
||||
List<Character> transformXML(XmlElement root) {
|
||||
final List<Character> result = [];
|
||||
for (final c in root.findElements('character')) {
|
||||
final kanji = c.findElements('literal').first.innerText;
|
||||
result.add(
|
||||
Character(
|
||||
literal: kanji,
|
||||
strokeCount:
|
||||
int.parse(c.findAllElements('stroke_count').first.innerText),
|
||||
grade:
|
||||
int.tryParse(c.findElements('grade').firstOrNull?.innerText ?? ''),
|
||||
frequency:
|
||||
int.tryParse(c.findElements('freq').firstOrNull?.innerText ?? ''),
|
||||
jlpt: int.tryParse(
|
||||
c.findElements('rad_name').firstOrNull?.innerText ?? '',
|
||||
),
|
||||
radicalName:
|
||||
c.findElements('rad_name').map((e) => e.innerText).toList(),
|
||||
codepoints: c
|
||||
.findAllElements('cp_value')
|
||||
.map(
|
||||
(e) => CodePoint(
|
||||
kanji: kanji,
|
||||
type: e.getAttribute('cp_type')!,
|
||||
codepoint: e.innerText,
|
||||
),
|
||||
)
|
||||
.toList(),
|
||||
radicals: c
|
||||
.findAllElements('rad_value')
|
||||
.map(
|
||||
(e) => Radical(
|
||||
kanji: kanji,
|
||||
type: e.getAttribute('rad_type')!,
|
||||
radical: e.innerText,
|
||||
),
|
||||
)
|
||||
.toList(),
|
||||
strokeMiscounts: c
|
||||
.findAllElements('stroke_count')
|
||||
.skip(1)
|
||||
.map((e) => int.parse(e.innerText))
|
||||
.toList(),
|
||||
variants: c
|
||||
.findAllElements('variant')
|
||||
.map(
|
||||
(e) => Variant(
|
||||
kanji: kanji,
|
||||
type: e.getAttribute('var_type')!,
|
||||
variant: e.innerText,
|
||||
),
|
||||
)
|
||||
.toList(),
|
||||
dictionaryReferences: c
|
||||
.findAllElements('dic_ref')
|
||||
.where((e) => e.getAttribute('dr_type') != 'moro')
|
||||
.map(
|
||||
(e) => DictionaryReference(
|
||||
kanji: kanji,
|
||||
type: e.getAttribute('dr_type')!,
|
||||
ref: e.innerText,
|
||||
),
|
||||
)
|
||||
.toList(),
|
||||
dictionaryReferencesMoro: c
|
||||
.findAllElements('dic_ref')
|
||||
.where((e) => e.getAttribute('dr_type') == 'moro')
|
||||
.map(
|
||||
(e) => DictionaryReferenceMoro(
|
||||
kanji: kanji,
|
||||
ref: e.innerText,
|
||||
page: int.tryParse(e.getAttribute('m_page') ?? ''),
|
||||
volume: int.tryParse(e.getAttribute('m_vol') ?? ''),
|
||||
),
|
||||
)
|
||||
.toList(),
|
||||
querycodes: c
|
||||
.findAllElements('q_code')
|
||||
.map(
|
||||
(e) => QueryCode(
|
||||
kanji: kanji,
|
||||
code: e.innerText,
|
||||
type: e.getAttribute('qc_type')!,
|
||||
skipMisclassification: e.getAttribute('skip_misclass'),
|
||||
),
|
||||
)
|
||||
.toList(),
|
||||
readings: c
|
||||
.findAllElements('reading')
|
||||
.where(
|
||||
(e) => !['ja_on', 'ja_kun'].contains(e.getAttribute('r_type')),
|
||||
)
|
||||
.map(
|
||||
(e) => Reading(
|
||||
kanji: kanji,
|
||||
type: e.getAttribute('r_type')!,
|
||||
reading: e.innerText,
|
||||
),
|
||||
)
|
||||
.toList(),
|
||||
kunyomi: c
|
||||
.findAllElements('reading')
|
||||
.where((e) => e.getAttribute('r_type') == 'ja_kun')
|
||||
.map(
|
||||
(e) => Kunyomi(
|
||||
kanji: kanji,
|
||||
yomi: e.innerText,
|
||||
isJouyou: e.getAttribute('r_status') == 'jy',
|
||||
),
|
||||
)
|
||||
.toList(),
|
||||
onyomi: c
|
||||
.findAllElements('reading')
|
||||
.where((e) => e.getAttribute('r_type') == 'ja_on')
|
||||
.map(
|
||||
(e) => Onyomi(
|
||||
kanji: kanji,
|
||||
yomi: e.innerText,
|
||||
isJouyou: e.getAttribute('r_status') == 'jy',
|
||||
type: e.getAttribute('on_type')),
|
||||
)
|
||||
.toList(),
|
||||
meanings: c
|
||||
.findAllElements('meaning')
|
||||
.map(
|
||||
(e) => Meaning(
|
||||
kanji: kanji,
|
||||
language: e.getAttribute('m_lang') ?? 'eng',
|
||||
meaning: e.innerText,
|
||||
),
|
||||
)
|
||||
.toList(),
|
||||
nanori: c.findAllElements('nanori').map((e) => e.innerText).toList(),
|
||||
),
|
||||
);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
Future<void> insertIntoDB(List<Character> characters, Database db) async {
|
||||
final b = db.batch();
|
||||
for (final c in characters) {
|
||||
// if (c.dictionaryReferences.any((e) =>
|
||||
// c.dictionaryReferences
|
||||
// .where((e2) => e.kanji == e2.kanji && e.type == e2.type)
|
||||
// .length >
|
||||
// 1)) {
|
||||
// print(c.dictionaryReferences.map((e) => e.sqlValue).toList());
|
||||
// }
|
||||
b.insert(TableNames.character, c.sqlValue);
|
||||
for (final n in c.radicalName) {
|
||||
b.insert(TableNames.radicalName, {'kanji': c.literal, 'name': n});
|
||||
}
|
||||
for (final cp in c.codepoints) {
|
||||
b.insert(TableNames.codepoint, cp.sqlValue);
|
||||
}
|
||||
for (final r in c.radicals) {
|
||||
b.insert(TableNames.radical, r.sqlValue);
|
||||
}
|
||||
for (final sm in c.strokeMiscounts) {
|
||||
b.insert(
|
||||
TableNames.strokeMiscount,
|
||||
{
|
||||
'kanji': c.literal,
|
||||
'strokeCount': sm,
|
||||
},
|
||||
);
|
||||
}
|
||||
for (final v in c.variants) {
|
||||
b.insert(TableNames.variant, v.sqlValue);
|
||||
}
|
||||
for (final dr in c.dictionaryReferences) {
|
||||
// There are duplicate entries here
|
||||
b.insert(
|
||||
TableNames.dictionaryReference,
|
||||
dr.sqlValue,
|
||||
conflictAlgorithm: ConflictAlgorithm.ignore,
|
||||
);
|
||||
}
|
||||
for (final drm in c.dictionaryReferencesMoro) {
|
||||
b.insert(TableNames.dictionaryReferenceMoro, drm.sqlValue);
|
||||
}
|
||||
for (final q in c.querycodes) {
|
||||
b.insert(TableNames.queryCode, q.sqlValue);
|
||||
}
|
||||
for (final r in c.readings) {
|
||||
b.insert(TableNames.reading, r.sqlValue);
|
||||
}
|
||||
for (final k in c.kunyomi) {
|
||||
b.insert(TableNames.kunyomi, k.sqlValue);
|
||||
}
|
||||
for (final o in c.onyomi) {
|
||||
b.insert(TableNames.onyomi, o.sqlValue);
|
||||
}
|
||||
for (final m in c.meanings) {
|
||||
b.insert(TableNames.meaning, m.sqlValue);
|
||||
}
|
||||
for (final n in c.nanori) {
|
||||
b.insert(
|
||||
TableNames.nanori,
|
||||
{
|
||||
'kanji': c.literal,
|
||||
'nanori': n,
|
||||
},
|
||||
);
|
||||
}
|
||||
}
|
||||
b.commit();
|
||||
}
|
||||
|
||||
Future<void> addDataFromKANJIDIC(Database db) async {
|
||||
print('[KANJIDIC2] Reading file...');
|
||||
String rawXML = File('data/kanjidic2.xml').readAsStringSync();
|
||||
|
||||
print('[KANJIDIC2] Parsing XML...');
|
||||
XmlElement root = XmlDocument.parse(rawXML).getElement('kanjidic2')!;
|
||||
|
||||
print('[KANJIDIC2] Transforming data...');
|
||||
final entries = transformXML(root);
|
||||
|
||||
print('[KANJIDIC2] Writing to database...');
|
||||
await insertIntoDB(entries, db);
|
||||
}
|
||||
5
bin/objects.dart
Normal file
5
bin/objects.dart
Normal file
@@ -0,0 +1,5 @@
|
||||
abstract class SQLWritable {
|
||||
const SQLWritable();
|
||||
|
||||
Map<String, Object?> get sqlValue;
|
||||
}
|
||||
13
bin/radkfile/objects.dart
Normal file
13
bin/radkfile/objects.dart
Normal file
@@ -0,0 +1,13 @@
|
||||
class Radical {
|
||||
final String radical;
|
||||
final String kanji;
|
||||
|
||||
// TODO:
|
||||
final String something;
|
||||
|
||||
const Radical({
|
||||
required this.radical,
|
||||
required this.kanji,
|
||||
required this.something,
|
||||
});
|
||||
}
|
||||
32
bin/radkfile/parser.dart
Normal file
32
bin/radkfile/parser.dart
Normal file
@@ -0,0 +1,32 @@
|
||||
import 'dart:io';
|
||||
|
||||
import 'package:sqflite_common/sqlite_api.dart';
|
||||
|
||||
Future<void> addDataFromRADKFILE(Database db) async {
|
||||
final String content = File('data/radkfile_utf8').readAsStringSync();
|
||||
final Iterable<String> blocks =
|
||||
content.replaceAll(RegExp(r'^#.*$'), '').split(r'$').skip(2);
|
||||
|
||||
print('[RADKFILE] Writing to database...');
|
||||
final b = db.batch();
|
||||
|
||||
for (final block in blocks) {
|
||||
final String radical = block[1];
|
||||
final List<String> kanjiList = block
|
||||
.replaceFirst(RegExp(r'.*\n'), '')
|
||||
.split('')
|
||||
..removeWhere((e) => e == '' || e == '\n');
|
||||
|
||||
for (final kanji in kanjiList.toSet()) {
|
||||
b.insert(
|
||||
'RADKFILE',
|
||||
{
|
||||
'radical': radical,
|
||||
'kanji': kanji,
|
||||
},
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
b.commit();
|
||||
}
|
||||
622
bin/romaji_transliteration.dart
Normal file
622
bin/romaji_transliteration.dart
Normal file
@@ -0,0 +1,622 @@
|
||||
// Source: https://github.com/Kimtaro/ve/blob/master/lib/providers/japanese_transliterators.rb
|
||||
|
||||
const hiragana_syllabic_n = 'ん';
|
||||
const hiragana_small_tsu = 'っ';
|
||||
|
||||
const Map<String, String> hiragana_to_latin = {
|
||||
'あ': 'a',
|
||||
'い': 'i',
|
||||
'う': 'u',
|
||||
'え': 'e',
|
||||
'お': 'o',
|
||||
'か': 'ka',
|
||||
'き': 'ki',
|
||||
'く': 'ku',
|
||||
'け': 'ke',
|
||||
'こ': 'ko',
|
||||
'が': 'ga',
|
||||
'ぎ': 'gi',
|
||||
'ぐ': 'gu',
|
||||
'げ': 'ge',
|
||||
'ご': 'go',
|
||||
'さ': 'sa',
|
||||
'し': 'shi',
|
||||
'す': 'su',
|
||||
'せ': 'se',
|
||||
'そ': 'so',
|
||||
'ざ': 'za',
|
||||
'じ': 'ji',
|
||||
'ず': 'zu',
|
||||
'ぜ': 'ze',
|
||||
'ぞ': 'zo',
|
||||
'た': 'ta',
|
||||
'ち': 'chi',
|
||||
'つ': 'tsu',
|
||||
'て': 'te',
|
||||
'と': 'to',
|
||||
'だ': 'da',
|
||||
'ぢ': 'ji',
|
||||
'づ': 'zu',
|
||||
'で': 'de',
|
||||
'ど': 'do',
|
||||
'な': 'na',
|
||||
'に': 'ni',
|
||||
'ぬ': 'nu',
|
||||
'ね': 'ne',
|
||||
'の': 'no',
|
||||
'は': 'ha',
|
||||
'ひ': 'hi',
|
||||
'ふ': 'fu',
|
||||
'へ': 'he',
|
||||
'ほ': 'ho',
|
||||
'ば': 'ba',
|
||||
'び': 'bi',
|
||||
'ぶ': 'bu',
|
||||
'べ': 'be',
|
||||
'ぼ': 'bo',
|
||||
'ぱ': 'pa',
|
||||
'ぴ': 'pi',
|
||||
'ぷ': 'pu',
|
||||
'ぺ': 'pe',
|
||||
'ぽ': 'po',
|
||||
'ま': 'ma',
|
||||
'み': 'mi',
|
||||
'む': 'mu',
|
||||
'め': 'me',
|
||||
'も': 'mo',
|
||||
'や': 'ya',
|
||||
'ゆ': 'yu',
|
||||
'よ': 'yo',
|
||||
'ら': 'ra',
|
||||
'り': 'ri',
|
||||
'る': 'ru',
|
||||
'れ': 're',
|
||||
'ろ': 'ro',
|
||||
'わ': 'wa',
|
||||
'うぃ': 'whi',
|
||||
'うぇ': 'whe',
|
||||
'を': 'wo',
|
||||
'ゑ': 'we',
|
||||
'ゐ': 'wi',
|
||||
'ー': '-',
|
||||
'ん': 'n',
|
||||
'きゃ': 'kya',
|
||||
'きゅ': 'kyu',
|
||||
'きょ': 'kyo',
|
||||
'きぇ': 'kye',
|
||||
'きぃ': 'kyi',
|
||||
'ぎゃ': 'gya',
|
||||
'ぎゅ': 'gyu',
|
||||
'ぎょ': 'gyo',
|
||||
'ぎぇ': 'gye',
|
||||
'ぎぃ': 'gyi',
|
||||
'くぁ': 'kwa',
|
||||
'くぃ': 'kwi',
|
||||
'くぅ': 'kwu',
|
||||
'くぇ': 'kwe',
|
||||
'くぉ': 'kwo',
|
||||
'ぐぁ': 'qwa',
|
||||
'ぐぃ': 'gwi',
|
||||
'ぐぅ': 'gwu',
|
||||
'ぐぇ': 'gwe',
|
||||
'ぐぉ': 'gwo',
|
||||
'しゃ': 'sha',
|
||||
'しぃ': 'syi',
|
||||
'しゅ': 'shu',
|
||||
'しぇ': 'she',
|
||||
'しょ': 'sho',
|
||||
'じゃ': 'ja',
|
||||
'じゅ': 'ju',
|
||||
'じぇ': 'jye',
|
||||
'じょ': 'jo',
|
||||
'じぃ': 'jyi',
|
||||
'すぁ': 'swa',
|
||||
'すぃ': 'swi',
|
||||
'すぅ': 'swu',
|
||||
'すぇ': 'swe',
|
||||
'すぉ': 'swo',
|
||||
'ちゃ': 'cha',
|
||||
'ちゅ': 'chu',
|
||||
'ちぇ': 'tye',
|
||||
'ちょ': 'cho',
|
||||
'ちぃ': 'tyi',
|
||||
'ぢゃ': 'ja',
|
||||
'ぢぃ': 'dyi',
|
||||
'ぢゅ': 'ju',
|
||||
'ぢぇ': 'dye',
|
||||
'ぢょ': 'jo',
|
||||
'つぁ': 'tsa',
|
||||
'つぃ': 'tsi',
|
||||
'つぇ': 'tse',
|
||||
'つぉ': 'tso',
|
||||
'てゃ': 'tha',
|
||||
'てぃ': 'thi',
|
||||
'てゅ': 'thu',
|
||||
'てぇ': 'the',
|
||||
'てょ': 'tho',
|
||||
'とぁ': 'twa',
|
||||
'とぃ': 'twi',
|
||||
'とぅ': 'twu',
|
||||
'とぇ': 'twe',
|
||||
'とぉ': 'two',
|
||||
'でゃ': 'dha',
|
||||
'でぃ': 'dhi',
|
||||
'でゅ': 'dhu',
|
||||
'でぇ': 'dhe',
|
||||
'でょ': 'dho',
|
||||
'どぁ': 'dwa',
|
||||
'どぃ': 'dwi',
|
||||
'どぅ': 'dwu',
|
||||
'どぇ': 'dwe',
|
||||
'どぉ': 'dwo',
|
||||
'にゃ': 'nya',
|
||||
'にゅ': 'nyu',
|
||||
'にょ': 'nyo',
|
||||
'にぇ': 'nye',
|
||||
'にぃ': 'nyi',
|
||||
'ひゃ': 'hya',
|
||||
'ひぃ': 'hyi',
|
||||
'ひゅ': 'hyu',
|
||||
'ひぇ': 'hye',
|
||||
'ひょ': 'hyo',
|
||||
'びゃ': 'bya',
|
||||
'びぃ': 'byi',
|
||||
'びゅ': 'byu',
|
||||
'びぇ': 'bye',
|
||||
'びょ': 'byo',
|
||||
'ぴゃ': 'pya',
|
||||
'ぴぃ': 'pyi',
|
||||
'ぴゅ': 'pyu',
|
||||
'ぴぇ': 'pye',
|
||||
'ぴょ': 'pyo',
|
||||
'ふぁ': 'fwa',
|
||||
'ふぃ': 'fyi',
|
||||
'ふぇ': 'fye',
|
||||
'ふぉ': 'fwo',
|
||||
'ふぅ': 'fwu',
|
||||
'ふゃ': 'fya',
|
||||
'ふゅ': 'fyu',
|
||||
'ふょ': 'fyo',
|
||||
'みゃ': 'mya',
|
||||
'みぃ': 'myi',
|
||||
'みゅ': 'myu',
|
||||
'みぇ': 'mye',
|
||||
'みょ': 'myo',
|
||||
'りゃ': 'rya',
|
||||
'りぃ': 'ryi',
|
||||
'りゅ': 'ryu',
|
||||
'りぇ': 'rye',
|
||||
'りょ': 'ryo',
|
||||
'ゔぁ': 'va',
|
||||
'ゔぃ': 'vyi',
|
||||
'ゔ': 'vu',
|
||||
'ゔぇ': 'vye',
|
||||
'ゔぉ': 'vo',
|
||||
'ゔゃ': 'vya',
|
||||
'ゔゅ': 'vyu',
|
||||
'ゔょ': 'vyo',
|
||||
'うぁ': 'wha',
|
||||
'いぇ': 'ye',
|
||||
'うぉ': 'who',
|
||||
'ぁ': 'xa',
|
||||
'ぃ': 'xi',
|
||||
'ぅ': 'xu',
|
||||
'ぇ': 'xe',
|
||||
'ぉ': 'xo',
|
||||
'ゕ': 'xka',
|
||||
'ゖ': 'xke',
|
||||
'ゎ': 'xwa'
|
||||
};
|
||||
|
||||
const Map<String, String> latin_to_hiragana = {
|
||||
'a': 'あ',
|
||||
'i': 'い',
|
||||
'u': 'う',
|
||||
'e': 'え',
|
||||
'o': 'お',
|
||||
'ka': 'か',
|
||||
'ki': 'き',
|
||||
'ku': 'く',
|
||||
'ke': 'け',
|
||||
'ko': 'こ',
|
||||
'ga': 'が',
|
||||
'gi': 'ぎ',
|
||||
'gu': 'ぐ',
|
||||
'ge': 'げ',
|
||||
'go': 'ご',
|
||||
'sa': 'さ',
|
||||
'si': 'し',
|
||||
'shi': 'し',
|
||||
'su': 'す',
|
||||
'se': 'せ',
|
||||
'so': 'そ',
|
||||
'za': 'ざ',
|
||||
'zi': 'じ',
|
||||
'ji': 'じ',
|
||||
'zu': 'ず',
|
||||
'ze': 'ぜ',
|
||||
'zo': 'ぞ',
|
||||
'ta': 'た',
|
||||
'ti': 'ち',
|
||||
'chi': 'ち',
|
||||
'tu': 'つ',
|
||||
'tsu': 'つ',
|
||||
'te': 'て',
|
||||
'to': 'と',
|
||||
'da': 'だ',
|
||||
'di': 'ぢ',
|
||||
'du': 'づ',
|
||||
'dzu': 'づ',
|
||||
'de': 'で',
|
||||
'do': 'ど',
|
||||
'na': 'な',
|
||||
'ni': 'に',
|
||||
'nu': 'ぬ',
|
||||
'ne': 'ね',
|
||||
'no': 'の',
|
||||
'ha': 'は',
|
||||
'hi': 'ひ',
|
||||
'hu': 'ふ',
|
||||
'fu': 'ふ',
|
||||
'he': 'へ',
|
||||
'ho': 'ほ',
|
||||
'ba': 'ば',
|
||||
'bi': 'び',
|
||||
'bu': 'ぶ',
|
||||
'be': 'べ',
|
||||
'bo': 'ぼ',
|
||||
'pa': 'ぱ',
|
||||
'pi': 'ぴ',
|
||||
'pu': 'ぷ',
|
||||
'pe': 'ぺ',
|
||||
'po': 'ぽ',
|
||||
'ma': 'ま',
|
||||
'mi': 'み',
|
||||
'mu': 'む',
|
||||
'me': 'め',
|
||||
'mo': 'も',
|
||||
'ya': 'や',
|
||||
'yu': 'ゆ',
|
||||
'yo': 'よ',
|
||||
'ra': 'ら',
|
||||
'ri': 'り',
|
||||
'ru': 'る',
|
||||
're': 'れ',
|
||||
'ro': 'ろ',
|
||||
'la': 'ら',
|
||||
'li': 'り',
|
||||
'lu': 'る',
|
||||
'le': 'れ',
|
||||
'lo': 'ろ',
|
||||
'wa': 'わ',
|
||||
'wi': 'うぃ',
|
||||
'we': 'うぇ',
|
||||
'wo': 'を',
|
||||
'wye': 'ゑ',
|
||||
'wyi': 'ゐ',
|
||||
'-': 'ー',
|
||||
'n': 'ん',
|
||||
'nn': 'ん',
|
||||
"n'": 'ん',
|
||||
'kya': 'きゃ',
|
||||
'kyu': 'きゅ',
|
||||
'kyo': 'きょ',
|
||||
'kye': 'きぇ',
|
||||
'kyi': 'きぃ',
|
||||
'gya': 'ぎゃ',
|
||||
'gyu': 'ぎゅ',
|
||||
'gyo': 'ぎょ',
|
||||
'gye': 'ぎぇ',
|
||||
'gyi': 'ぎぃ',
|
||||
'kwa': 'くぁ',
|
||||
'kwi': 'くぃ',
|
||||
'kwu': 'くぅ',
|
||||
'kwe': 'くぇ',
|
||||
'kwo': 'くぉ',
|
||||
'gwa': 'ぐぁ',
|
||||
'gwi': 'ぐぃ',
|
||||
'gwu': 'ぐぅ',
|
||||
'gwe': 'ぐぇ',
|
||||
'gwo': 'ぐぉ',
|
||||
'qwa': 'ぐぁ',
|
||||
'qwi': 'ぐぃ',
|
||||
'qwu': 'ぐぅ',
|
||||
'qwe': 'ぐぇ',
|
||||
'qwo': 'ぐぉ',
|
||||
'sya': 'しゃ',
|
||||
'syi': 'しぃ',
|
||||
'syu': 'しゅ',
|
||||
'sye': 'しぇ',
|
||||
'syo': 'しょ',
|
||||
'sha': 'しゃ',
|
||||
'shu': 'しゅ',
|
||||
'she': 'しぇ',
|
||||
'sho': 'しょ',
|
||||
'ja': 'じゃ',
|
||||
'ju': 'じゅ',
|
||||
'je': 'じぇ',
|
||||
'jo': 'じょ',
|
||||
'jya': 'じゃ',
|
||||
'jyi': 'じぃ',
|
||||
'jyu': 'じゅ',
|
||||
'jye': 'じぇ',
|
||||
'jyo': 'じょ',
|
||||
'zya': 'じゃ',
|
||||
'zyu': 'じゅ',
|
||||
'zyo': 'じょ',
|
||||
'zye': 'じぇ',
|
||||
'zyi': 'じぃ',
|
||||
'swa': 'すぁ',
|
||||
'swi': 'すぃ',
|
||||
'swu': 'すぅ',
|
||||
'swe': 'すぇ',
|
||||
'swo': 'すぉ',
|
||||
'cha': 'ちゃ',
|
||||
'chu': 'ちゅ',
|
||||
'che': 'ちぇ',
|
||||
'cho': 'ちょ',
|
||||
'cya': 'ちゃ',
|
||||
'cyi': 'ちぃ',
|
||||
'cyu': 'ちゅ',
|
||||
'cye': 'ちぇ',
|
||||
'cyo': 'ちょ',
|
||||
'tya': 'ちゃ',
|
||||
'tyi': 'ちぃ',
|
||||
'tyu': 'ちゅ',
|
||||
'tye': 'ちぇ',
|
||||
'tyo': 'ちょ',
|
||||
'dya': 'ぢゃ',
|
||||
'dyi': 'ぢぃ',
|
||||
'dyu': 'ぢゅ',
|
||||
'dye': 'ぢぇ',
|
||||
'dyo': 'ぢょ',
|
||||
'tsa': 'つぁ',
|
||||
'tsi': 'つぃ',
|
||||
'tse': 'つぇ',
|
||||
'tso': 'つぉ',
|
||||
'tha': 'てゃ',
|
||||
'thi': 'てぃ',
|
||||
'thu': 'てゅ',
|
||||
'the': 'てぇ',
|
||||
'tho': 'てょ',
|
||||
'twa': 'とぁ',
|
||||
'twi': 'とぃ',
|
||||
'twu': 'とぅ',
|
||||
'twe': 'とぇ',
|
||||
'two': 'とぉ',
|
||||
'dha': 'でゃ',
|
||||
'dhi': 'でぃ',
|
||||
'dhu': 'でゅ',
|
||||
'dhe': 'でぇ',
|
||||
'dho': 'でょ',
|
||||
'dwa': 'どぁ',
|
||||
'dwi': 'どぃ',
|
||||
'dwu': 'どぅ',
|
||||
'dwe': 'どぇ',
|
||||
'dwo': 'どぉ',
|
||||
'nya': 'にゃ',
|
||||
'nyu': 'にゅ',
|
||||
'nyo': 'にょ',
|
||||
'nye': 'にぇ',
|
||||
'nyi': 'にぃ',
|
||||
'hya': 'ひゃ',
|
||||
'hyi': 'ひぃ',
|
||||
'hyu': 'ひゅ',
|
||||
'hye': 'ひぇ',
|
||||
'hyo': 'ひょ',
|
||||
'bya': 'びゃ',
|
||||
'byi': 'びぃ',
|
||||
'byu': 'びゅ',
|
||||
'bye': 'びぇ',
|
||||
'byo': 'びょ',
|
||||
'pya': 'ぴゃ',
|
||||
'pyi': 'ぴぃ',
|
||||
'pyu': 'ぴゅ',
|
||||
'pye': 'ぴぇ',
|
||||
'pyo': 'ぴょ',
|
||||
'fa': 'ふぁ',
|
||||
'fi': 'ふぃ',
|
||||
'fe': 'ふぇ',
|
||||
'fo': 'ふぉ',
|
||||
'fwa': 'ふぁ',
|
||||
'fwi': 'ふぃ',
|
||||
'fwu': 'ふぅ',
|
||||
'fwe': 'ふぇ',
|
||||
'fwo': 'ふぉ',
|
||||
'fya': 'ふゃ',
|
||||
'fyi': 'ふぃ',
|
||||
'fyu': 'ふゅ',
|
||||
'fye': 'ふぇ',
|
||||
'fyo': 'ふょ',
|
||||
'mya': 'みゃ',
|
||||
'myi': 'みぃ',
|
||||
'myu': 'みゅ',
|
||||
'mye': 'みぇ',
|
||||
'myo': 'みょ',
|
||||
'rya': 'りゃ',
|
||||
'ryi': 'りぃ',
|
||||
'ryu': 'りゅ',
|
||||
'rye': 'りぇ',
|
||||
'ryo': 'りょ',
|
||||
'lya': 'りゃ',
|
||||
'lyu': 'りゅ',
|
||||
'lyo': 'りょ',
|
||||
'lye': 'りぇ',
|
||||
'lyi': 'りぃ',
|
||||
'va': 'ゔぁ',
|
||||
'vi': 'ゔぃ',
|
||||
'vu': 'ゔ',
|
||||
've': 'ゔぇ',
|
||||
'vo': 'ゔぉ',
|
||||
'vya': 'ゔゃ',
|
||||
'vyi': 'ゔぃ',
|
||||
'vyu': 'ゔゅ',
|
||||
'vye': 'ゔぇ',
|
||||
'vyo': 'ゔょ',
|
||||
'wha': 'うぁ',
|
||||
'whi': 'うぃ',
|
||||
'ye': 'いぇ',
|
||||
'whe': 'うぇ',
|
||||
'who': 'うぉ',
|
||||
'xa': 'ぁ',
|
||||
'xi': 'ぃ',
|
||||
'xu': 'ぅ',
|
||||
'xe': 'ぇ',
|
||||
'xo': 'ぉ',
|
||||
'xya': 'ゃ',
|
||||
'xyu': 'ゅ',
|
||||
'xyo': 'ょ',
|
||||
'xtu': 'っ',
|
||||
'xtsu': 'っ',
|
||||
'xka': 'ゕ',
|
||||
'xke': 'ゖ',
|
||||
'xwa': 'ゎ',
|
||||
'@@': ' ',
|
||||
'#[': '「',
|
||||
'#]': '」',
|
||||
'#,': '、',
|
||||
'#.': '。',
|
||||
'#/': '・',
|
||||
};
|
||||
|
||||
bool _smallTsu(String for_conversion) => for_conversion == hiragana_small_tsu;
|
||||
bool _nFollowedByYuYeYo(String for_conversion, String kana) =>
|
||||
for_conversion == hiragana_syllabic_n &&
|
||||
kana.length > 1 &&
|
||||
'やゆよ'.contains(kana.substring(1, 2));
|
||||
|
||||
String transliterateHiraganaToLatin(String hiragana) {
|
||||
String kana = hiragana;
|
||||
String romaji = '';
|
||||
bool geminate = false;
|
||||
|
||||
while (kana.isNotEmpty) {
|
||||
final lengths = [if (kana.length > 1) 2, 1];
|
||||
for (final length in lengths) {
|
||||
final String for_conversion = kana.substring(0, length);
|
||||
String? mora;
|
||||
|
||||
if (_smallTsu(for_conversion)) {
|
||||
geminate = true;
|
||||
kana = kana.replaceRange(0, length, '');
|
||||
break;
|
||||
} else if (_nFollowedByYuYeYo(for_conversion, kana)) {
|
||||
mora = "n'";
|
||||
}
|
||||
mora ??= hiragana_to_latin[for_conversion];
|
||||
|
||||
if (mora != null) {
|
||||
if (geminate) {
|
||||
geminate = false;
|
||||
romaji += mora.substring(0, 1);
|
||||
}
|
||||
romaji += mora;
|
||||
kana = kana.replaceRange(0, length, '');
|
||||
break;
|
||||
} else if (length == 1) {
|
||||
romaji += for_conversion;
|
||||
kana = kana.replaceRange(0, length, '');
|
||||
}
|
||||
}
|
||||
}
|
||||
return romaji;
|
||||
}
|
||||
|
||||
bool _doubleNFollowedByAIUEO(String for_conversion) =>
|
||||
RegExp(r'^nn[aiueo]$').hasMatch(for_conversion);
|
||||
bool _hasTableMatch(String for_conversion) =>
|
||||
latin_to_hiragana[for_conversion] != null;
|
||||
bool _hasDoubleConsonant(String for_conversion, int length) =>
|
||||
for_conversion == 'tch' ||
|
||||
(length == 2 &&
|
||||
RegExp(r'^([kgsztdnbpmyrlwchf])\1$').hasMatch(for_conversion));
|
||||
|
||||
String transliterateLatinToHiragana(String latin) {
|
||||
String romaji =
|
||||
latin.toLowerCase().replaceAll('mb', 'nb').replaceAll('mp', 'np');
|
||||
String kana = '';
|
||||
|
||||
while (romaji.isNotEmpty) {
|
||||
final lengths = [
|
||||
if (romaji.length > 2) 3,
|
||||
if (romaji.length > 1) 2,
|
||||
1,
|
||||
];
|
||||
|
||||
for (final length in lengths) {
|
||||
String? mora;
|
||||
int for_removal = length;
|
||||
final String for_conversion = romaji.substring(0, length);
|
||||
|
||||
if (_doubleNFollowedByAIUEO(for_conversion)) {
|
||||
mora = hiragana_syllabic_n;
|
||||
for_removal = 1;
|
||||
} else if (_hasTableMatch(for_conversion)) {
|
||||
mora = latin_to_hiragana[for_conversion];
|
||||
} else if (_hasDoubleConsonant(for_conversion, length)) {
|
||||
mora = hiragana_small_tsu;
|
||||
for_removal = 1;
|
||||
}
|
||||
|
||||
if (mora != null) {
|
||||
kana += mora;
|
||||
romaji = romaji.replaceRange(0, for_removal, '');
|
||||
break;
|
||||
} else if (length == 1) {
|
||||
kana += for_conversion;
|
||||
romaji = romaji.replaceRange(0, 1, '');
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return kana;
|
||||
}
|
||||
|
||||
String _transposeCodepointsInRange(
|
||||
String text,
|
||||
int distance,
|
||||
int rangeStart,
|
||||
int rangeEnd,
|
||||
) =>
|
||||
String.fromCharCodes(
|
||||
text.codeUnits
|
||||
.map((c) => c + ((rangeStart <= c && c <= rangeEnd) ? distance : 0)),
|
||||
);
|
||||
|
||||
String transliterateKanaToLatin(String kana) =>
|
||||
transliterateHiraganaToLatin(transliterateKatakanaToHiragana(kana));
|
||||
|
||||
String transliterateLatinToKatakana(String latin) =>
|
||||
transliterateHiraganaToKatakana(transliterateLatinToHiragana(latin));
|
||||
|
||||
String transliterateKatakanaToHiragana(String katakana) =>
|
||||
_transposeCodepointsInRange(katakana, -96, 12449, 12534);
|
||||
|
||||
String transliterateHiraganaToKatakana(String hiragana) =>
|
||||
_transposeCodepointsInRange(hiragana, 96, 12353, 12438);
|
||||
|
||||
String transliterateFullwidthRomajiToHalfwidth(String halfwidth) =>
|
||||
_transposeCodepointsInRange(
|
||||
_transposeCodepointsInRange(
|
||||
halfwidth,
|
||||
-65248,
|
||||
65281,
|
||||
65374,
|
||||
),
|
||||
-12256,
|
||||
12288,
|
||||
12288,
|
||||
);
|
||||
|
||||
String transliterateHalfwidthRomajiToFullwidth(String halfwidth) =>
|
||||
_transposeCodepointsInRange(
|
||||
_transposeCodepointsInRange(
|
||||
halfwidth,
|
||||
65248,
|
||||
33,
|
||||
126,
|
||||
),
|
||||
12256,
|
||||
32,
|
||||
32,
|
||||
);
|
||||
Reference in New Issue
Block a user