Files
tamerye/src/lib.rs
T

229 lines
6.9 KiB
Rust

mod kana_transliteration;
mod text_normalization;
mod util;
use sqlite3_ext::{
Connection, FromValue, Result, ValueRef, ValueType, function::Context, sqlite3_ext_fn,
sqlite3_ext_main,
};
use crate::kana_transliteration::{
transliterate_fullwidth_katakana_to_halfwidth_katakana,
transliterate_fullwidth_romaji_to_halfwidth_romaji,
transliterate_halfwidth_katakana_to_fullwidth_katakana,
transliterate_halfwidth_romaji_to_fullwidth_romaji, transliterate_hiragana_to_katakana,
transliterate_katakana_to_hiragana,
};
use crate::text_normalization::normalize_japanese_text;
#[sqlite3_ext_fn(n_args = 1, deterministic, risk_level=Innocuous)]
fn hiragana_to_katakana(ctx: &mut Context, args: &mut [&mut ValueRef]) -> Result<()> {
if args[0].value_type() != ValueType::Text {
return Err(sqlite3_ext::Error::Module(
"hiragana_to_katakana requires a string argument".to_string(),
));
}
let arg = args[0].get_str()?;
let result = transliterate_hiragana_to_katakana(arg);
ctx.set_result(result)?;
Ok(())
}
#[sqlite3_ext_fn(n_args = 1, deterministic, risk_level=Innocuous)]
fn katakana_to_hiragana(ctx: &mut Context, args: &mut [&mut ValueRef]) -> Result<()> {
if args[0].value_type() != ValueType::Text {
return Err(sqlite3_ext::Error::Module(
"katakana_to_hiragana requires a string argument".to_string(),
));
}
let arg = args[0].get_str()?;
let result = transliterate_katakana_to_hiragana(arg);
ctx.set_result(result)?;
Ok(())
}
#[sqlite3_ext_fn(n_args = 1, deterministic, risk_level=Innocuous)]
fn fullwidth_romaji_to_halfwidth_romaji(
ctx: &mut Context,
args: &mut [&mut ValueRef],
) -> Result<()> {
if args[0].value_type() != ValueType::Text {
return Err(sqlite3_ext::Error::Module(
"fullwidth_romaji_to_halfwidth_romaji requires a string argument".to_string(),
));
}
let arg = args[0].get_str()?;
let result = transliterate_fullwidth_romaji_to_halfwidth_romaji(arg);
ctx.set_result(result)?;
Ok(())
}
#[sqlite3_ext_fn(n_args = 1, deterministic, risk_level=Innocuous)]
fn halfwidth_romaji_to_fullwidth_romaji(
ctx: &mut Context,
args: &mut [&mut ValueRef],
) -> Result<()> {
if args[0].value_type() != ValueType::Text {
return Err(sqlite3_ext::Error::Module(
"halfwidth_romaji_to_fullwidth_romaji requires a string argument".to_string(),
));
}
let arg = args[0].get_str()?;
let result = transliterate_halfwidth_romaji_to_fullwidth_romaji(arg);
ctx.set_result(result)?;
Ok(())
}
#[sqlite3_ext_fn(n_args = 1, deterministic, risk_level=Innocuous)]
fn fullwidth_katakana_to_halfwidth_katakana(
ctx: &mut Context,
args: &mut [&mut ValueRef],
) -> Result<()> {
if args[0].value_type() != ValueType::Text {
return Err(sqlite3_ext::Error::Module(
"fullwidth_katakana_to_halfwidth_katakana requires a string argument".to_string(),
));
}
let arg = args[0].get_str()?;
let result = transliterate_fullwidth_katakana_to_halfwidth_katakana(arg);
ctx.set_result(result)?;
Ok(())
}
#[sqlite3_ext_fn(n_args = 1, deterministic, risk_level=Innocuous)]
fn halfwidth_katakana_to_fullwidth_katakana(
ctx: &mut Context,
args: &mut [&mut ValueRef],
) -> Result<()> {
if args[0].value_type() != ValueType::Text {
return Err(sqlite3_ext::Error::Module(
"halfwidth_katakana_to_fullwidth_katakana requires a string argument".to_string(),
));
}
let arg = args[0].get_str()?;
let result = transliterate_halfwidth_katakana_to_fullwidth_katakana(arg);
ctx.set_result(result)?;
Ok(())
}
#[sqlite3_ext_fn(n_args = 1, deterministic, risk_level=Innocuous)]
fn normalize_jp(ctx: &mut Context, args: &mut [&mut ValueRef]) -> Result<()> {
if args[0].value_type() != ValueType::Text {
return Err(sqlite3_ext::Error::Module(
"normalize_jp requires a string argument".to_string(),
));
}
let arg = args[0].get_str()?;
let result = normalize_japanese_text(arg);
ctx.set_result(result)?;
Ok(())
}
#[sqlite3_ext_main(persistent)]
fn init(db: &Connection) -> Result<()> {
db.create_scalar_function(
"hiragana_to_katakana",
&HIRAGANA_TO_KATAKANA_OPTS,
hiragana_to_katakana,
)?;
db.create_scalar_function(
"katakana_to_hiragana",
&KATAKANA_TO_HIRAGANA_OPTS,
katakana_to_hiragana,
)?;
db.create_scalar_function(
"fullwidth_romaji_to_halfwidth_romaji",
&FULLWIDTH_ROMAJI_TO_HALFWIDTH_ROMAJI_OPTS,
fullwidth_romaji_to_halfwidth_romaji,
)?;
db.create_scalar_function(
"halfwidth_romaji_to_fullwidth_romaji",
&HALFWIDTH_ROMAJI_TO_FULLWIDTH_ROMAJI_OPTS,
halfwidth_romaji_to_fullwidth_romaji,
)?;
db.create_scalar_function(
"fullwidth_katakana_to_halfwidth_katakana",
&FULLWIDTH_KATAKANA_TO_HALFWIDTH_KATAKANA_OPTS,
fullwidth_katakana_to_halfwidth_katakana,
)?;
db.create_scalar_function(
"halfwidth_katakana_to_fullwidth_katakana",
&HALFWIDTH_KATAKANA_TO_FULLWIDTH_KATAKANA_OPTS,
halfwidth_katakana_to_fullwidth_katakana,
)?;
db.create_scalar_function("normalize_jp", &NORMALIZE_JP_OPTS, normalize_jp)?;
Ok(())
}
#[cfg(all(test, feature = "static"))]
mod test {
use super::*;
use sqlite3_ext::{Database, Error, FallibleIterator, FallibleIteratorMut};
fn setup() -> Result<Database> {
let conn = Database::open(":memory:")?;
init(&conn)?;
Ok(conn)
}
#[test]
fn test_basic_sqlite_query() -> Result<()> {
let conn = setup()?;
let results: Vec<_> = conn
.prepare(
"SELECT
hiragana_to_katakana('ひらがな'),
katakana_to_hiragana('カタカナ')
",
)?
.query(())?
.map(|row| Ok((row[0].get_str()?.to_string(), row[1].get_str()?.to_string())))
.collect()?;
assert_eq!(
results,
vec![("ヒラガナ".to_string(), "かたかな".to_string())]
);
Ok(())
}
#[test]
fn test_text_normalization() -> Result<()> {
let conn = setup()?;
conn.execute("CREATE TABLE test(value TEXT)", Vec::<&str>::new())?;
conn.execute(
"CREATE INDEX idx_normalized_jp ON test(normalize_jp(value))",
Vec::<&str>::new(),
)?;
conn.execute(
"INSERT INTO test(value) VALUES ('あ'), ('ア'), ('い'), ('イ')",
Vec::<&str>::new(),
)?;
let results: Vec<_> = conn
.prepare("SELECT value FROM test WHERE normalize_jp(value) = 'あ'")?
.query(())?
.map(|row| Ok(row[0].get_str()?.to_string()))
.collect()?;
assert_eq!(results, vec!["".to_string(), "".to_string(),]);
Ok(())
}
}