diff --git a/lib/_data_ingestion/jmdict/parser.dart b/lib/_data_ingestion/jmdict/parser.dart index 7eb28b1..7c299c8 100644 --- a/lib/_data_ingestion/jmdict/parser.dart +++ b/lib/_data_ingestion/jmdict/parser.dart @@ -2,7 +2,7 @@ import 'dart:collection'; import 'dart:io'; import 'package:collection/collection.dart'; -import 'package:jadb/romaji_transliteration.dart'; +import 'package:jadb/util/romaji_transliteration.dart'; import 'package:sqflite_common/sqlite_api.dart'; import 'package:xml/xml.dart'; diff --git a/lib/util/datetime_extensions.dart b/lib/util/datetime_extensions.dart new file mode 100644 index 0000000..351d0d2 --- /dev/null +++ b/lib/util/datetime_extensions.dart @@ -0,0 +1,303 @@ +import 'dart:core'; + +// Run in console on http://www.kumamotokokufu-h.ed.jp/kumamoto/bungaku/nengoui.html +// Array.from(document.querySelectorAll('table')[1].querySelectorAll('tr')).slice(1).map(row => `DateTime(${row.children[3 + (row.children.length == 7)].innerHTML.replace(/~.*/, '')}, ${row.children[4 + (row.children.length == 7)].innerHTML.replace('/', ', ')}): '${row.children[2 + (row.children.length == 7)].innerHTML}'`).join('\n') + +const Map<(int, int, int), String> periodsBeforeNanbokuchou = { + (645, 6, 19): '大化', + (650, 2, 15): '白雉', + (686, 7, 20): '朱鳥', + (701, 3, 21): '大宝', + (704, 5, 10): '慶雲', + (708, 1, 11): '和銅', + (715, 9, 2): '霊亀', + (717, 11, 17): '養老', + (724, 2, 4): '神亀', + (729, 8, 5): '天平', + (749, 4, 14): '天平感宝', + (749, 7, 2): '天平勝宝', + (757, 8, 18): '天平宝字', + (765, 1, 7): '天平神護', + (767, 8, 16): '神護景雲', + (770, 10, 1): '宝亀', + (781, 1, 1): '天応', + (782, 8, 19): '延暦', + (806, 5, 18): '大同', + (810, 9, 19): '弘仁', + (824, 1, 5): '天長', + (834, 1, 3): '承和', + (848, 6, 13): '嘉祥', + (851, 4, 28): '仁寿', + (854, 11, 30): '斎衡', + (857, 2, 21): '天安', + (859, 4, 15): '貞観', + (877, 4, 16): '元慶', + (885, 2, 21): '仁和', + (889, 4, 27): '寛平', + (898, 4, 26): '昌泰', + (901, 7, 15): '延喜', + (923, 4, 11): '延長', + (931, 4, 26): '承平', + (938, 5, 22): '天慶', + (947, 4, 22): '天暦', + (957, 10, 27): '天徳', + (961, 2, 16): '応和', + (964, 7, 10): '康保', + (968, 8, 13): '安和', + (970, 3, 25): '天禄', + (973, 12, 20): '天延', + (976, 7, 13): '貞元', + (978, 11, 29): '天元', + (983, 4, 15): '永観', + (985, 4, 27): '寛和', + (987, 4, 5): '永延', + (989, 8, 8): '永祚', + (990, 11, 7): '正暦', + (995, 2, 22): '長徳', + (999, 1, 13): '長保', + (1004, 7, 20): '寛弘', + (1012, 12, 25): '長和', + (1017, 4, 23): '寛仁', + (1021, 2, 2): '治安', + (1024, 7, 13): '万寿', + (1028, 7, 25): '長元', + (1037, 4, 21): '長暦', + (1040, 11, 10): '長久', + (1044, 11, 24): '寛徳', + (1046, 4, 14): '永承', + (1053, 1, 11): '天喜', + (1058, 8, 29): '康平', + (1065, 8, 2): '治暦', + (1069, 4, 13): '延久', + (1074, 8, 23): '承保', + (1077, 11, 17): '承暦', + (1081, 2, 10): '永保', + (1084, 2, 7): '応徳', + (1087, 4, 7): '寛治', + (1094, 12, 15): '嘉保', + (1096, 12, 17): '永長', + (1097, 11, 21): '承徳', + (1099, 8, 28): '康和', + (1104, 2, 10): '長治', + (1106, 4, 9): '嘉承', + (1108, 8, 3): '天仁', + (1110, 7, 13): '天永', + (1113, 7, 13): '永久', + (1118, 4, 3): '元永', + (1120, 4, 10): '保安', + (1124, 4, 3): '天治', + (1126, 1, 22): '大治', + (1131, 1, 29): '天承', + (1132, 8, 11): '長承', + (1135, 4, 27): '保延', + (1141, 7, 10): '永治', + (1142, 4, 28): '康治', + (1144, 2, 23): '天養', + (1145, 7, 22): '久安', + (1151, 1, 26): '仁平', + (1154, 10, 28): '久寿', + (1156, 4, 27): '保元', + (1159, 4, 20): '平治', + (1160, 1, 10): '永暦', + (1161, 9, 4): '応保', + (1163, 3, 29): '長寛', + (1165, 6, 5): '永万', + (1166, 8, 27): '仁安', + (1169, 4, 8): '嘉応', + (1171, 4, 21): '承安', + (1175, 7, 28): '安元', + (1177, 8, 4): '治承', + (1181, 7, 14): '養和', + (1182, 5, 27): '寿永', + (1184, 4, 16): '元暦', + (1185, 8, 14): '文治', + (1190, 4, 11): '建久', + (1199, 4, 27): '正治', + (1201, 2, 13): '建仁', + (1204, 2, 20): '元久', + (1206, 4, 27): '建永', + (1207, 10, 25): '承元', + (1211, 3, 9): '建暦', + (1213, 12, 6): '建保', + (1219, 4, 12): '承久', + (1222, 4, 13): '貞応', + (1224, 11, 20): '元仁', + (1225, 4, 20): '嘉禄', + (1227, 12, 10): '安貞', + (1229, 3, 5): '寛喜', + (1232, 4, 2): '貞永', + (1233, 4, 15): '天福', + (1234, 11, 5): '文暦', + (1235, 9, 19): '嘉禎', + (1238, 11, 23): '暦仁', + (1239, 2, 7): '延応', + (1240, 7, 16): '仁治', + (1243, 2, 26): '寛元', + (1247, 2, 28): '宝治', + (1249, 3, 18): '建長', + (1256, 10, 5): '康元', + (1257, 3, 14): '正嘉', + (1259, 3, 26): '正元', + (1260, 4, 13): '文応', + (1261, 2, 20): '弘長', + (1264, 2, 28): '文永', + (1275, 4, 25): '建治', + (1278, 2, 29): '弘安', + (1288, 4, 28): '正応', + (1293, 8, 5): '永仁', + (1299, 4, 25): '正安', + (1302, 11, 21): '乾元', + (1303, 8, 5): '嘉元', + (1306, 12, 14): '徳治', + (1308, 10, 9): '延慶', + (1311, 4, 28): '応長', + (1312, 3, 20): '正和', + (1317, 2, 3): '文保', + (1319, 4, 28): '元応', + (1321, 2, 23): '元亨', + (1324, 12, 9): '正中', + (1326, 4, 26): '嘉暦', +}; + +const Map<(int, int, int), String> periodsNanbokuchouNorth = { + (1329, 8, 29): '元徳', + (1332, 4, 28): '正慶', + (1334, 1, 29): '建武', + (1338, 8, 28): '暦応', + (1342, 4, 27): '康永', + (1345, 10, 21): '貞和', + (1350, 2, 27): '観応', + (1352, 9, 27): '文和', + (1356, 3, 28): '延文', + (1361, 3, 29): '康安', + (1362, 9, 23): '貞治', + (1368, 2, 18): '応安', + (1375, 2, 27): '永和', + (1379, 3, 22): '康暦', + (1381, 2, 24): '永徳', + (1384, 2, 27): '至徳', + (1387, 8, 23): '嘉慶', + (1389, 2, 9): '康応', + (1390, 3, 26): '明徳', +}; + +const Map<(int, int, int), String> periodsNanbokuchouSouth = { + (1329, 8, 29): '元徳', + (1331, 8, 9): '元弘', + (1334, 1, 29): '建武', + (1336, 2, 29): '延元', + (1340, 4, 28): '興国', + (1346, 12, 8): '正平', + (1370, 7, 24): '建徳', + (1372, 4, 4): '文中', + (1375, 5, 27): '天授', + (1381, 2, 10): '弘和', + (1384, 4, 28): '元中', +}; + +const Map<(int, int, int), String> periodsAfterNanbokuchou = { + // (1392, ): '室町時代', + (1394, 7, 5): '応永', + (1428, 4, 27): '正長', + (1429, 9, 5): '永享', + (1441, 2, 17): '嘉吉', + (1444, 2, 5): '文安', + (1449, 7, 28): '宝徳', + (1452, 7, 25): '享徳', + (1455, 7, 25): '康正', + (1457, 9, 28): '長禄', + (1460, 12, 21): '寛正', + (1466, 2, 28): '文正', + (1467, 3, 5): '応仁', + (1469, 4, 28): '文明', + (1487, 7, 20): '長享', + (1489, 8, 21): '延徳', + (1492, 7, 19): '明応', + (1501, 2, 29): '文亀', + (1504, 2, 30): '永正', + (1521, 8, 23): '大永', + (1528, 8, 20): '享禄', + (1532, 7, 29): '天文', + (1555, 10, 23): '弘治', + (1558, 2, 28): '永禄', + (1570, 4, 23): '元亀', + (1573, 7, 28): '天正', + (1592, 12, 8): '文禄', + (1596, 10, 27): '慶長', + (1615, 7, 13): '元和', + (1624, 2, 30): '寛永', + (1644, 12, 16): '正保', + (1648, 2, 15): '慶安', + (1652, 9, 18): '承応', + (1655, 4, 13): '明暦', + (1658, 7, 23): '万治', + (1661, 4, 25): '寛文', + (1673, 9, 21): '延宝', + (1681, 9, 29): '天和', + (1684, 2, 21): '貞享', + (1688, 9, 30): '元禄', + (1704, 3, 13): '宝永', + (1711, 4, 25): '正徳', + (1716, 6, 22): '享保', + (1736, 4, 28): '元文', + (1741, 2, 27): '寛保', + (1744, 2, 21): '延享', + (1748, 7, 12): '寛延', + (1751, 10, 27): '宝暦', + (1764, 6, 2): '明和', + (1772, 11, 16): '安永', + (1781, 4, 2): '天明', + (1789, 1, 25): '寛政', + (1801, 2, 5): '享和', + (1804, 2, 11): '文化', + (1818, 4, 22): '文政', + (1830, 12, 10): '天保', + (1844, 12, 2): '弘化', + (1848, 2, 28): '嘉永', + (1854, 11, 27): '安政', + (1860, 3, 18): '万延', + (1861, 2, 19): '文久', + (1864, 2, 20): '元治', + (1865, 4, 7): '慶応', + (1868, 9, 8): '明治', + (1912, 7, 30): '大正', + (1926, 12, 25): '昭和', + (1989, 1, 8): '平成', + (2019, 5, 1): '令和', +}; + +extension on DateTime { + /// Note: In the years between 1336 and 1392, Japan was split in two + /// because of an ongoing conflict. As a result, Japan has two timelines + /// during these years. Unless you turn off [nanbokuchouPeriodUsesNorth], + /// function will give you the timeline from the northern part of Japan + /// as a default. + /// + /// See more info here: + /// - https://en.wikipedia.org/wiki/Nanboku-ch%C5%8D_period + /// - http://www.kumamotokokufu-h.ed.jp/kumamoto/bungaku/nengoui.html + String? japaneseEra({bool nanbokuchouPeriodUsesNorth = true}) { + throw UnimplementedError('This function is not implemented yet.'); + + if (this.year < 645) { + return null; + } + + if (this.year < periodsNanbokuchouNorth.keys.first.$1) { + // TODO: find first where year <= this.year and jump one period back. + } + } + + String get japaneseWeekdayPrefix => [ + '月', + '火', + '水', + '木', + '金', + '土', + '日', + ][weekday - 1]; + + /// Returns the date in Japanese format. + String japaneseDate({bool showWeekday = false}) => '$month月$day日' + (showWeekday ? '($japaneseWeekdayPrefix)' : ''); +} diff --git a/lib/jouyou_kanji.dart b/lib/util/jouyou_kanji.dart similarity index 100% rename from lib/jouyou_kanji.dart rename to lib/util/jouyou_kanji.dart diff --git a/lib/util/number_transliteration.dart b/lib/util/number_transliteration.dart new file mode 100644 index 0000000..05d618b --- /dev/null +++ b/lib/util/number_transliteration.dart @@ -0,0 +1,58 @@ +const Map numberToKanjiMap = { + 0: '冷', + 1: '一', + 2: '二', + 3: '三', + 4: '四', + 5: '五', + 6: '六', + 7: '七', + 8: '八', + 9: '九', + 10: '十', + 100: '百', + 1000: '千', + 10000: '万', + 100000000: '億', + 1000000000000: '兆', +}; + +const Map extendedNumberToKanjiMap = { + 16: '京', + 20: '垓', + 24: '𥝱', + 28: '穣', + 32: '溝', + 36: '澗', + 40: '正', + 44: '載', + 48: '極', +}; + +const Map numberToFormalKanjiMap = { + 0: '冷', + 1: '壱', + 2: '弐', + 3: '参', + 4: '肆', + 5: '伍', + 6: '陸', + 7: '漆', + 8: '捌', + 9: '玖', + 10: '拾', + 100: '陌', + 1000: '阡', + 10000: '萬', +}; + +const String yenSymbol = '円'; +const String formalYenSymbol = '圓'; + +String numberToKanji(int number, {bool formal = false}) { + throw UnimplementedError(); +} + +int kanjiToNumber(String kanji) { + throw UnimplementedError(); +} diff --git a/lib/romaji_transliteration.dart b/lib/util/romaji_transliteration.dart similarity index 100% rename from lib/romaji_transliteration.dart rename to lib/util/romaji_transliteration.dart diff --git a/lib/util/text_filtering.dart b/lib/util/text_filtering.dart new file mode 100644 index 0000000..d42b1d7 --- /dev/null +++ b/lib/util/text_filtering.dart @@ -0,0 +1,28 @@ +/// The string version of a regex that will match any Unified CJK Character. +/// This includes the ranges (), () +/// +/// See https://www.regular-expressions.info/unicode.html +/// +/// Remember to turn on the unicode flag when making a new RegExp. +const String rawKanjiRegex = r'\p{Script=Hani}'; + +/// The string version of a regex that will match any katakana. +/// This includes the ranges (), () +/// +/// See https://www.regular-expressions.info/unicode.html +/// +/// Remember to turn on the unicode flag when making a new RegExp. +const String rawKatakanaRegex = r'\p{Script=Katakana}'; + +/// The string version of a regex that will match any hiragana. +/// This includes the ranges (), () +/// +/// See https://www.regular-expressions.info/unicode.html +/// +/// Remember to turn on the unicode flag when making a new RegExp. +const String rawHiraganaRegex = r'\p{Script=Hiragana}'; + + +final RegExp kanjiRegex = RegExp(rawKanjiRegex, unicode: true); +final RegExp katakanaRegex = RegExp(rawKatakanaRegex, unicode: true); +final RegExp hiraganaRegex = RegExp(rawHiraganaRegex, unicode: true); diff --git a/test/jouyou_kanji_test.dart b/test/jouyou_kanji_test.dart index 01b9b30..95d2b85 100644 --- a/test/jouyou_kanji_test.dart +++ b/test/jouyou_kanji_test.dart @@ -1,5 +1,5 @@ import 'package:collection/collection.dart'; -import 'package:jadb/jouyou_kanji.dart'; +import 'package:jadb/util/jouyou_kanji.dart'; import 'package:test/test.dart'; void main() { diff --git a/test/search/kanji_search_test.dart b/test/search/kanji_search_test.dart index 2a42d32..5c6ddaf 100644 --- a/test/search/kanji_search_test.dart +++ b/test/search/kanji_search_test.dart @@ -1,4 +1,4 @@ -import 'package:jadb/jouyou_kanji.dart'; +import 'package:jadb/util/jouyou_kanji.dart'; import 'package:test/test.dart'; import 'setup_database_connection.dart'; diff --git a/test/romaji_transliteration_test.dart b/test/util/romaji_transliteration_test.dart similarity index 96% rename from test/romaji_transliteration_test.dart rename to test/util/romaji_transliteration_test.dart index 39d6582..f4177aa 100644 --- a/test/romaji_transliteration_test.dart +++ b/test/util/romaji_transliteration_test.dart @@ -1,4 +1,4 @@ -import 'package:jadb/romaji_transliteration.dart'; +import 'package:jadb/util/romaji_transliteration.dart'; import 'package:test/test.dart'; void main() {