util/romaji_transliteration: add functions to generate transliteration spans
All checks were successful
Build and test / evals (push) Successful in 18m58s

This commit is contained in:
2026-03-02 18:21:06 +09:00
parent d14e3909d4
commit a86f857553
2 changed files with 154 additions and 0 deletions

View File

@@ -487,6 +487,7 @@ bool _nFollowedByYuYeYo(String forConversion, String kana) =>
kana.length > 1 &&
'やゆよ'.contains(kana.substring(1, 2));
/// Transliterates a string of hiragana characters to Latin script (romaji).
String transliterateHiraganaToLatin(String hiragana) {
String kana = hiragana;
String romaji = '';
@@ -524,6 +525,51 @@ String transliterateHiraganaToLatin(String hiragana) {
return romaji;
}
/// Returns a list of pairs of indices into the input and output strings,
/// indicating which characters in the input string correspond to which characters in the output string.
List<(int, int)> transliterateHiraganaToLatinSpan(String hiragana) {
String kana = hiragana;
String romaji = '';
final List<(int, int)> spans = [];
bool geminate = false;
int kanaIndex = 0;
while (kana.isNotEmpty) {
final lengths = [if (kana.length > 1) 2, 1];
for (final length in lengths) {
final String forConversion = kana.substring(0, length);
String? mora;
if (_smallTsu(forConversion)) {
geminate = true;
kana = kana.replaceRange(0, length, '');
break;
} else if (_nFollowedByYuYeYo(forConversion, kana)) {
mora = "n'";
}
mora ??= hiraganaToLatin[forConversion];
if (mora != null) {
if (geminate) {
geminate = false;
romaji += mora.substring(0, 1);
}
spans.add((kanaIndex, romaji.length));
romaji += mora;
kana = kana.replaceRange(0, length, '');
kanaIndex += length;
break;
} else if (length == 1) {
spans.add((kanaIndex, romaji.length));
romaji += forConversion;
kana = kana.replaceRange(0, length, '');
kanaIndex += length;
}
}
}
return spans;
}
bool _doubleNFollowedByAIUEO(String forConversion) =>
RegExp(r'^nn[aiueo]$').hasMatch(forConversion);
bool _hasTableMatch(String forConversion) =>
@@ -533,6 +579,7 @@ bool _hasDoubleConsonant(String forConversion, int length) =>
(length == 2 &&
RegExp(r'^([kgsztdnbpmyrlwchf])\1$').hasMatch(forConversion));
/// Transliterates a string of Latin script (romaji) to hiragana characters.
String transliterateLatinToHiragana(String latin) {
String romaji = latin
.toLowerCase()
@@ -572,6 +619,53 @@ String transliterateLatinToHiragana(String latin) {
return kana;
}
/// Returns a list of pairs of indices into the input and output strings,
/// indicating which characters in the input string correspond to which characters in the output string.
List<(int, int)> transliterateLatinToHiraganaSpan(String latin) {
String romaji = latin
.toLowerCase()
.replaceAll('mb', 'nb')
.replaceAll('mp', 'np');
String kana = '';
final List<(int, int)> spans = [];
int latinIndex = 0;
while (romaji.isNotEmpty) {
final lengths = [if (romaji.length > 2) 3, if (romaji.length > 1) 2, 1];
for (final length in lengths) {
String? mora;
int forRemoval = length;
final String forConversion = romaji.substring(0, length);
if (_doubleNFollowedByAIUEO(forConversion)) {
mora = hiraganaSyllabicN;
forRemoval = 1;
} else if (_hasTableMatch(forConversion)) {
mora = latinToHiragana[forConversion];
} else if (_hasDoubleConsonant(forConversion, length)) {
mora = hiraganaSmallTsu;
forRemoval = 1;
}
if (mora != null) {
spans.add((latinIndex, kana.length));
kana += mora;
romaji = romaji.replaceRange(0, forRemoval, '');
latinIndex += forRemoval;
break;
} else if (length == 1) {
spans.add((latinIndex, kana.length));
kana += forConversion;
romaji = romaji.replaceRange(0, 1, '');
latinIndex += 1;
}
}
}
return spans;
}
String _transposeCodepointsInRange(
String text,
int distance,
@@ -583,15 +677,19 @@ String _transposeCodepointsInRange(
),
);
/// Transliterates a string of kana characters (hiragana or katakana) to Latin script (romaji).
String transliterateKanaToLatin(String kana) =>
transliterateHiraganaToLatin(transliterateKatakanaToHiragana(kana));
/// Transliterates a string of Latin script (romaji) to katakana characters.
String transliterateLatinToKatakana(String latin) =>
transliterateHiraganaToKatakana(transliterateLatinToHiragana(latin));
/// Transliterates a string of katakana characters to hiragana characters.
String transliterateKatakanaToHiragana(String katakana) =>
_transposeCodepointsInRange(katakana, -96, 12449, 12534);
/// Transliterates a string of hiragana characters to katakana characters.
String transliterateHiraganaToKatakana(String hiragana) =>
_transposeCodepointsInRange(hiragana, 96, 12353, 12438);