Add basic kana transliteration functions
Build and test / check (push) Failing after 44s
Build and test / build-dynamic-library (push) Successful in 49s
Build and test / build-static-library (push) Successful in 1m36s
Build and test / test (push) Successful in 54s

This commit is contained in:
2026-05-07 16:32:01 +09:00
parent 6fc475de1a
commit 0d21cfa4a1
2 changed files with 524 additions and 43 deletions
+381
View File
@@ -0,0 +1,381 @@
fn transpose_codepoints_in_range(
text: &str,
distance: i32,
range_start: u32,
range_end: u32,
) -> String {
debug_assert!(range_start <= range_end);
debug_assert!(i64::from(range_start) + i64::from(distance) >= 0);
debug_assert!(i64::from(range_end) + i64::from(distance) <= i64::from(u32::MAX));
text.chars()
.map(|c| {
let codepoint = c as u32;
if range_start <= codepoint && codepoint <= range_end {
std::char::from_u32(codepoint.strict_add_signed(distance)).unwrap_or(c)
} else {
c
}
})
.collect()
}
// See charts:
// - Basic Latin: https://www.unicode.org/charts/PDF/U0000.pdf
// - Hiragana: https://www.unicode.org/charts/PDF/U3040.pdf
// - Katakana: https://www.unicode.org/charts/PDF/U30A0.pdf
// - Halfwidth characters: https://www.unicode.org/charts/PDF/UFF00.pdf
pub fn transliterate_katakana_to_hiragana(katakana: &str) -> String {
transpose_codepoints_in_range(katakana, -96, 12449, 12534)
}
pub fn transliterate_hiragana_to_katakana(hiragana: &str) -> String {
transpose_codepoints_in_range(hiragana, 96, 12353, 12438)
}
pub fn transliterate_fullwidth_romaji_to_halfwidth_romaji(halfwidth: &str) -> String {
transpose_codepoints_in_range(
&transpose_codepoints_in_range(halfwidth, -65248, 0xFF01, 0xFF5E),
-12256,
12288,
12288,
)
}
pub fn transliterate_halfwidth_romaji_to_fullwidth_romaji(halfwidth: &str) -> String {
transpose_codepoints_in_range(
&transpose_codepoints_in_range(halfwidth, 65248, 0x21, 0x7E),
12256,
32,
32,
)
}
pub fn transliterate_halfwidth_katakana_to_fullwidth_katakana(halfwidth: &str) -> String {
let mut result = String::with_capacity(halfwidth.len());
let mut chars = halfwidth.chars().peekable();
while let Some(c) = chars.next() {
let out: &[char] = match c {
'ヲ' => &['ヲ'],
'ァ' => &['ァ'],
'ィ' => &['ィ'],
'ゥ' => &['ゥ'],
'ェ' => &['ェ'],
'ォ' => &['ォ'],
'ャ' => &['ャ'],
'ュ' => &['ュ'],
'ョ' => &['ョ'],
'ッ' => &['ッ'],
'ー' => &['ー'],
'ア' => &['ア'],
'イ' => &['イ'],
'ウ' => &['ウ'],
'エ' => &['エ'],
'オ' => &['オ'],
'カ' if chars.peek() == Some(&'゙') => {
chars.next();
&['ガ']
}
'カ' => &['カ'],
'キ' if chars.peek() == Some(&'゙') => {
chars.next();
&['ギ']
}
'キ' => &['キ'],
'ク' if chars.peek() == Some(&'゙') => {
chars.next();
&['グ']
}
'ク' => &['ク'],
'ケ' if chars.peek() == Some(&'゙') => {
chars.next();
&['ゲ']
}
'ケ' => &['ケ'],
'コ' if chars.peek() == Some(&'゙') => {
chars.next();
&['ゴ']
}
'コ' => &['コ'],
'サ' if chars.peek() == Some(&'゙') => {
chars.next();
&['ザ']
}
'サ' => &['サ'],
'シ' if chars.peek() == Some(&'゙') => {
chars.next();
&['ジ']
}
'シ' => &['シ'],
'ス' if chars.peek() == Some(&'゙') => {
chars.next();
&['ズ']
}
'ス' => &['ス'],
'セ' if chars.peek() == Some(&'゙') => {
chars.next();
&['ゼ']
}
'セ' => &['セ'],
'ソ' if chars.peek() == Some(&'゙') => {
chars.next();
&['ゾ']
}
'ソ' => &['ソ'],
'タ' if chars.peek() == Some(&'゙') => {
chars.next();
&['ダ']
}
'タ' => &['タ'],
'チ' if chars.peek() == Some(&'゙') => {
chars.next();
&['ヂ']
}
'チ' => &['チ'],
'ツ' if chars.peek() == Some(&'゙') => {
chars.next();
&['ヅ']
}
'ツ' => &['ツ'],
'テ' if chars.peek() == Some(&'゙') => {
chars.next();
&['デ']
}
'テ' => &['テ'],
'ト' if chars.peek() == Some(&'゙') => {
chars.next();
&['ド']
}
'ト' => &['ト'],
'ナ' => &['ナ'],
'ニ' => &['ニ'],
'ヌ' => &['ヌ'],
'ネ' => &['ネ'],
'ノ' => &[''],
'ハ' => match chars.peek() {
Some(&'゙') => {
chars.next();
&['バ']
}
Some(&'゚') => {
chars.next();
&['パ']
}
_ => &['ハ'],
},
'ヒ' => match chars.peek() {
Some(&'゙') => {
chars.next();
&['ビ']
}
Some(&'゚') => {
chars.next();
&['ピ']
}
_ => &['ヒ'],
},
'フ' => match chars.peek() {
Some(&'゙') => {
chars.next();
&['ブ']
}
Some(&'゚') => {
chars.next();
&['プ']
}
_ => &['フ'],
},
'ヘ' => match chars.peek() {
Some(&'゙') => {
chars.next();
&['ベ']
}
Some(&'゚') => {
chars.next();
&['ペ']
}
_ => &['ヘ'],
},
'ホ' => match chars.peek() {
Some(&'゙') => {
chars.next();
&['ボ']
}
Some(&'゚') => {
chars.next();
&['ポ']
}
_ => &['ホ'],
},
'マ' => &['マ'],
'ミ' => &['ミ'],
'ム' => &['ム'],
'メ' => &['メ'],
'モ' => &['モ'],
'ヤ' => &['ヤ'],
'ユ' => &['ユ'],
'ヨ' => &['ヨ'],
'ラ' => &['ラ'],
'リ' => &['リ'],
'ル' => &['ル'],
'レ' => &['レ'],
'ロ' => &['ロ'],
'ワ' => &['ワ'],
'ン' => &['ン'],
_ => &[c],
};
result.push_str(out.iter().collect::<String>().as_str());
}
return result;
}
pub fn transliterate_fullwidth_katakana_to_halfwidth_katakana(fullwidth: &str) -> String {
let mut result = String::with_capacity(fullwidth.len() * 2);
for c in fullwidth.chars() {
let out: &[char] = match c {
'ヲ' => &['ヲ'],
'ァ' => &['ァ'],
'ィ' => &['ィ'],
'ゥ' => &['ゥ'],
'ェ' => &['ェ'],
'ォ' => &['ォ'],
'ャ' => &['ャ'],
'ュ' => &['ュ'],
'ョ' => &['ョ'],
'ッ' => &['ッ'],
'ー' => &['ー'],
'ア' => &['ア'],
'イ' => &['イ'],
'ウ' => &['ウ'],
'エ' => &['エ'],
'オ' => &['オ'],
'カ' => &['カ'],
'キ' => &['キ'],
'ク' => &['ク'],
'ケ' => &['ケ'],
'コ' => &['コ'],
'サ' => &['サ'],
'シ' => &['シ'],
'ス' => &['ス'],
'セ' => &['セ'],
'ソ' => &['ソ'],
'タ' => &['タ'],
'チ' => &['チ'],
'ツ' => &['ツ'],
'テ' => &['テ'],
'ト' => &['ト'],
'ナ' => &['ナ'],
'ニ' => &['ニ'],
'ヌ' => &['ヌ'],
'ネ' => &['ネ'],
'' => &['ノ'],
'ハ' => &['ハ'],
'ヒ' => &['ヒ'],
'フ' => &['フ'],
'ヘ' => &['ヘ'],
'ホ' => &['ホ'],
'マ' => &['マ'],
'ミ' => &['ミ'],
'ム' => &['ム'],
'メ' => &['メ'],
'モ' => &['モ'],
'ヤ' => &['ヤ'],
'ユ' => &['ユ'],
'ヨ' => &['ヨ'],
'ラ' => &['ラ'],
'リ' => &['リ'],
'ル' => &['ル'],
'レ' => &['レ'],
'ロ' => &['ロ'],
'ワ' => &['ワ'],
'ン' => &['ン'],
'ガ' => &['カ', '゙'],
'ギ' => &['キ', '゙'],
'グ' => &['ク', '゙'],
'ゲ' => &['ケ', '゙'],
'ゴ' => &['コ', '゙'],
'ザ' => &['サ', '゙'],
'ジ' => &['シ', '゙'],
'ズ' => &['ス', '゙'],
'ゼ' => &['セ', '゙'],
'ゾ' => &['ソ', '゙'],
'ダ' => &['タ', '゙'],
'ヂ' => &['チ', '゙'],
'ヅ' => &['ツ', '゙'],
'デ' => &['テ', '゙'],
'ド' => &['ト', '゙'],
'バ' => &['ハ', '゙'],
'ビ' => &['ヒ', '゙'],
'ブ' => &['フ', '゙'],
'ベ' => &['ヘ', '゙'],
'ボ' => &['ホ', '゙'],
'パ' => &['ハ', '゚'],
'ピ' => &['ヒ', '゚'],
'プ' => &['フ', '゚'],
'ペ' => &['ヘ', '゚'],
'ポ' => &['ホ', '゚'],
_ => &[c],
};
result.push_str(out.iter().collect::<String>().as_str());
}
return result;
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_katakana_to_hiragana() {
assert_eq!(transliterate_katakana_to_hiragana("カタカナ"), "かたかな");
}
#[test]
fn test_hiragana_to_katakana() {
assert_eq!(transliterate_hiragana_to_katakana("ひらがな"), "ヒラガナ");
}
#[test]
fn test_fullwidth_romaji_to_halfwidth_romaji() {
assert_eq!(
transliterate_fullwidth_romaji_to_halfwidth_romaji("ABC123"),
"ABC123"
);
}
#[test]
fn test_halfwidth_romaji_to_fullwidth_romaji() {
assert_eq!(
transliterate_halfwidth_romaji_to_fullwidth_romaji("ABC123"),
"ABC123"
);
}
#[test]
fn test_halfwidth_katakana_to_fullwidth_katakana() {
assert_eq!(
transliterate_halfwidth_katakana_to_fullwidth_katakana("カタカナ"),
"カタカナ"
);
assert_eq!(
transliterate_halfwidth_katakana_to_fullwidth_katakana("ガダガナピ"),
"ガダガナピ"
);
}
#[test]
fn test_fullwidth_katakana_to_halfwidth_katakana() {
assert_eq!(
transliterate_fullwidth_katakana_to_halfwidth_katakana("カタカナ"),
"カタカナ"
);
assert_eq!(
transliterate_fullwidth_katakana_to_halfwidth_katakana("ガダガナピ"),
"ガダガナピ"
);
}
}
+143 -43
View File
@@ -1,30 +1,151 @@
mod kana_transliteration;
use sqlite3_ext::{
Connection, FromValue, Result, ValueRef, ValueType, function::Context, sqlite3_ext_fn,
sqlite3_ext_main,
};
fn add(a: i32, b: i32) -> i32 {
a + b
}
use crate::kana_transliteration::{
transliterate_fullwidth_katakana_to_halfwidth_katakana,
transliterate_fullwidth_romaji_to_halfwidth_romaji,
transliterate_halfwidth_katakana_to_fullwidth_katakana,
transliterate_halfwidth_romaji_to_fullwidth_romaji, transliterate_hiragana_to_katakana,
transliterate_katakana_to_hiragana,
};
#[sqlite3_ext_fn(n_args = 2, deterministic)]
fn add_sqlite(ctx: &mut Context, args: &mut [&mut ValueRef]) -> Result<()> {
if args[0].value_type() != ValueType::Integer || args[1].value_type() != ValueType::Integer {
#[sqlite3_ext_fn(n_args = 1, deterministic)]
fn hiragana_to_katakana(ctx: &mut Context, args: &mut [&mut ValueRef]) -> Result<()> {
if args[0].value_type() != ValueType::Text {
return Err(sqlite3_ext::Error::Module(
"myadd requires both arguments to be an integer".to_string(),
"hiragana_to_katakana requires a string argument".to_string(),
));
}
let a = args[0].get_i32();
let b = args[1].get_i32();
let result = add(a, b);
let arg = args[0].get_str()?;
let result = transliterate_hiragana_to_katakana(arg);
ctx.set_result(result)?;
Ok(())
}
#[sqlite3_ext_fn(n_args = 1, deterministic)]
fn katakana_to_hiragana(ctx: &mut Context, args: &mut [&mut ValueRef]) -> Result<()> {
if args[0].value_type() != ValueType::Text {
return Err(sqlite3_ext::Error::Module(
"katakana_to_hiragana requires a string argument".to_string(),
));
}
let arg = args[0].get_str()?;
let result = transliterate_katakana_to_hiragana(arg);
ctx.set_result(result)?;
Ok(())
}
#[sqlite3_ext_fn(n_args = 1, deterministic)]
fn fullwidth_romaji_to_halfwidth_romaji(
ctx: &mut Context,
args: &mut [&mut ValueRef],
) -> Result<()> {
if args[0].value_type() != ValueType::Text {
return Err(sqlite3_ext::Error::Module(
"fullwidth_romaji_to_halfwidth_romaji requires a string argument".to_string(),
));
}
let arg = args[0].get_str()?;
let result = transliterate_fullwidth_romaji_to_halfwidth_romaji(arg);
ctx.set_result(result)?;
Ok(())
}
#[sqlite3_ext_fn(n_args = 1, deterministic)]
fn halfwidth_romaji_to_fullwidth_romaji(
ctx: &mut Context,
args: &mut [&mut ValueRef],
) -> Result<()> {
if args[0].value_type() != ValueType::Text {
return Err(sqlite3_ext::Error::Module(
"halfwidth_romaji_to_fullwidth_romaji requires a string argument".to_string(),
));
}
let arg = args[0].get_str()?;
let result = transliterate_halfwidth_romaji_to_fullwidth_romaji(arg);
ctx.set_result(result)?;
Ok(())
}
#[sqlite3_ext_fn(n_args = 1, deterministic)]
fn fullwidth_katakana_to_halfwidth_katakana(
ctx: &mut Context,
args: &mut [&mut ValueRef],
) -> Result<()> {
if args[0].value_type() != ValueType::Text {
return Err(sqlite3_ext::Error::Module(
"fullwidth_katakana_to_halfwidth_katakana requires a string argument".to_string(),
));
}
let arg = args[0].get_str()?;
let result = transliterate_fullwidth_katakana_to_halfwidth_katakana(arg);
ctx.set_result(result)?;
Ok(())
}
#[sqlite3_ext_fn(n_args = 1, deterministic)]
fn halfwidth_katakana_to_fullwidth_katakana(
ctx: &mut Context,
args: &mut [&mut ValueRef],
) -> Result<()> {
if args[0].value_type() != ValueType::Text {
return Err(sqlite3_ext::Error::Module(
"halfwidth_katakana_to_fullwidth_katakana requires a string argument".to_string(),
));
}
let arg = args[0].get_str()?;
let result = transliterate_halfwidth_katakana_to_fullwidth_katakana(arg);
ctx.set_result(result)?;
Ok(())
}
#[sqlite3_ext_main(persistent)]
fn init(db: &Connection) -> Result<()> {
db.create_scalar_function("myadd", &ADD_SQLITE_OPTS, add_sqlite)?;
db.create_scalar_function(
"hiragana_to_katakana",
&HIRAGANA_TO_KATAKANA_OPTS,
hiragana_to_katakana,
)?;
db.create_scalar_function(
"katakana_to_hiragana",
&KATAKANA_TO_HIRAGANA_OPTS,
katakana_to_hiragana,
)?;
db.create_scalar_function(
"fullwidth_romaji_to_halfwidth_romaji",
&FULLWIDTH_ROMAJI_TO_HALFWIDTH_ROMAJI_OPTS,
fullwidth_romaji_to_halfwidth_romaji,
)?;
db.create_scalar_function(
"halfwidth_romaji_to_fullwidth_romaji",
&HALFWIDTH_ROMAJI_TO_FULLWIDTH_ROMAJI_OPTS,
halfwidth_romaji_to_fullwidth_romaji,
)?;
db.create_scalar_function(
"fullwidth_katakana_to_halfwidth_katakana",
&FULLWIDTH_KATAKANA_TO_HALFWIDTH_KATAKANA_OPTS,
fullwidth_katakana_to_halfwidth_katakana,
)?;
db.create_scalar_function(
"halfwidth_katakana_to_fullwidth_katakana",
&HALFWIDTH_KATAKANA_TO_FULLWIDTH_KATAKANA_OPTS,
halfwidth_katakana_to_fullwidth_katakana,
)?;
Ok(())
}
@@ -41,44 +162,23 @@ mod test {
}
#[test]
fn test_working() -> Result<()> {
fn test_basic_sqlite_query() -> Result<()> {
let conn = setup()?;
let results: Vec<i64> = conn
.prepare("SELECT myadd(10, 20)")?
let results: Vec<_> = conn
.prepare(
"SELECT
hiragana_to_katakana('ひらがな'),
katakana_to_hiragana('カタカナ')
",
)?
.query(())?
.map(|row| Ok(row[0].get_i64()))
.map(|row| Ok((row[0].get_str()?.to_string(), row[1].get_str()?.to_string())))
.collect()?;
assert_eq!(results, vec![30]);
Ok(())
}
#[test]
fn test_wrong_arg_number() -> Result<()> {
let conn = setup()?;
let result = conn.prepare("SELECT myadd(10)");
assert_eq!(
result.unwrap_err(),
Error::Sqlite(
1,
Some("wrong number of arguments to function myadd()".to_string()),
)
results,
vec![("ヒラガナ".to_string(), "かたかな".to_string())]
);
Ok(())
}
#[test]
fn test_wrong_arg_type() -> Result<()> {
let conn = setup()?;
let mut statement = conn.prepare("SELECT myadd(10, 'hello!')")?;
let result = statement.query(())?.next();
assert_eq!(
result.unwrap_err(),
Error::Sqlite(
1,
Some("myadd requires both arguments to be an integer".to_string()),
)
);
Ok(())
}
}