229 lines
6.9 KiB
Rust
229 lines
6.9 KiB
Rust
mod kana_transliteration;
|
|
mod text_normalization;
|
|
mod util;
|
|
|
|
use sqlite3_ext::{
|
|
Connection, FromValue, Result, ValueRef, ValueType, function::Context, sqlite3_ext_fn,
|
|
sqlite3_ext_main,
|
|
};
|
|
|
|
use crate::kana_transliteration::{
|
|
transliterate_fullwidth_katakana_to_halfwidth_katakana,
|
|
transliterate_fullwidth_romaji_to_halfwidth_romaji,
|
|
transliterate_halfwidth_katakana_to_fullwidth_katakana,
|
|
transliterate_halfwidth_romaji_to_fullwidth_romaji, transliterate_hiragana_to_katakana,
|
|
transliterate_katakana_to_hiragana,
|
|
};
|
|
|
|
use crate::text_normalization::normalize_japanese_text;
|
|
|
|
#[sqlite3_ext_fn(n_args = 1, deterministic, risk_level=Innocuous)]
|
|
fn hiragana_to_katakana(ctx: &mut Context, args: &mut [&mut ValueRef]) -> Result<()> {
|
|
if args[0].value_type() != ValueType::Text {
|
|
return Err(sqlite3_ext::Error::Module(
|
|
"hiragana_to_katakana requires a string argument".to_string(),
|
|
));
|
|
}
|
|
|
|
let arg = args[0].get_str()?;
|
|
let result = transliterate_hiragana_to_katakana(arg);
|
|
ctx.set_result(result)?;
|
|
Ok(())
|
|
}
|
|
|
|
#[sqlite3_ext_fn(n_args = 1, deterministic, risk_level=Innocuous)]
|
|
fn katakana_to_hiragana(ctx: &mut Context, args: &mut [&mut ValueRef]) -> Result<()> {
|
|
if args[0].value_type() != ValueType::Text {
|
|
return Err(sqlite3_ext::Error::Module(
|
|
"katakana_to_hiragana requires a string argument".to_string(),
|
|
));
|
|
}
|
|
|
|
let arg = args[0].get_str()?;
|
|
let result = transliterate_katakana_to_hiragana(arg);
|
|
ctx.set_result(result)?;
|
|
Ok(())
|
|
}
|
|
|
|
#[sqlite3_ext_fn(n_args = 1, deterministic, risk_level=Innocuous)]
|
|
fn fullwidth_romaji_to_halfwidth_romaji(
|
|
ctx: &mut Context,
|
|
args: &mut [&mut ValueRef],
|
|
) -> Result<()> {
|
|
if args[0].value_type() != ValueType::Text {
|
|
return Err(sqlite3_ext::Error::Module(
|
|
"fullwidth_romaji_to_halfwidth_romaji requires a string argument".to_string(),
|
|
));
|
|
}
|
|
|
|
let arg = args[0].get_str()?;
|
|
let result = transliterate_fullwidth_romaji_to_halfwidth_romaji(arg);
|
|
ctx.set_result(result)?;
|
|
Ok(())
|
|
}
|
|
|
|
#[sqlite3_ext_fn(n_args = 1, deterministic, risk_level=Innocuous)]
|
|
fn halfwidth_romaji_to_fullwidth_romaji(
|
|
ctx: &mut Context,
|
|
args: &mut [&mut ValueRef],
|
|
) -> Result<()> {
|
|
if args[0].value_type() != ValueType::Text {
|
|
return Err(sqlite3_ext::Error::Module(
|
|
"halfwidth_romaji_to_fullwidth_romaji requires a string argument".to_string(),
|
|
));
|
|
}
|
|
|
|
let arg = args[0].get_str()?;
|
|
let result = transliterate_halfwidth_romaji_to_fullwidth_romaji(arg);
|
|
ctx.set_result(result)?;
|
|
Ok(())
|
|
}
|
|
|
|
#[sqlite3_ext_fn(n_args = 1, deterministic, risk_level=Innocuous)]
|
|
fn fullwidth_katakana_to_halfwidth_katakana(
|
|
ctx: &mut Context,
|
|
args: &mut [&mut ValueRef],
|
|
) -> Result<()> {
|
|
if args[0].value_type() != ValueType::Text {
|
|
return Err(sqlite3_ext::Error::Module(
|
|
"fullwidth_katakana_to_halfwidth_katakana requires a string argument".to_string(),
|
|
));
|
|
}
|
|
|
|
let arg = args[0].get_str()?;
|
|
let result = transliterate_fullwidth_katakana_to_halfwidth_katakana(arg);
|
|
ctx.set_result(result)?;
|
|
Ok(())
|
|
}
|
|
|
|
#[sqlite3_ext_fn(n_args = 1, deterministic, risk_level=Innocuous)]
|
|
fn halfwidth_katakana_to_fullwidth_katakana(
|
|
ctx: &mut Context,
|
|
args: &mut [&mut ValueRef],
|
|
) -> Result<()> {
|
|
if args[0].value_type() != ValueType::Text {
|
|
return Err(sqlite3_ext::Error::Module(
|
|
"halfwidth_katakana_to_fullwidth_katakana requires a string argument".to_string(),
|
|
));
|
|
}
|
|
|
|
let arg = args[0].get_str()?;
|
|
let result = transliterate_halfwidth_katakana_to_fullwidth_katakana(arg);
|
|
ctx.set_result(result)?;
|
|
Ok(())
|
|
}
|
|
|
|
#[sqlite3_ext_fn(n_args = 1, deterministic, risk_level=Innocuous)]
|
|
fn normalize_jp(ctx: &mut Context, args: &mut [&mut ValueRef]) -> Result<()> {
|
|
if args[0].value_type() != ValueType::Text {
|
|
return Err(sqlite3_ext::Error::Module(
|
|
"normalize_jp requires a string argument".to_string(),
|
|
));
|
|
}
|
|
|
|
let arg = args[0].get_str()?;
|
|
let result = normalize_japanese_text(arg);
|
|
ctx.set_result(result)?;
|
|
Ok(())
|
|
}
|
|
|
|
#[sqlite3_ext_main(persistent)]
|
|
fn init(db: &Connection) -> Result<()> {
|
|
db.create_scalar_function(
|
|
"hiragana_to_katakana",
|
|
&HIRAGANA_TO_KATAKANA_OPTS,
|
|
hiragana_to_katakana,
|
|
)?;
|
|
|
|
db.create_scalar_function(
|
|
"katakana_to_hiragana",
|
|
&KATAKANA_TO_HIRAGANA_OPTS,
|
|
katakana_to_hiragana,
|
|
)?;
|
|
|
|
db.create_scalar_function(
|
|
"fullwidth_romaji_to_halfwidth_romaji",
|
|
&FULLWIDTH_ROMAJI_TO_HALFWIDTH_ROMAJI_OPTS,
|
|
fullwidth_romaji_to_halfwidth_romaji,
|
|
)?;
|
|
|
|
db.create_scalar_function(
|
|
"halfwidth_romaji_to_fullwidth_romaji",
|
|
&HALFWIDTH_ROMAJI_TO_FULLWIDTH_ROMAJI_OPTS,
|
|
halfwidth_romaji_to_fullwidth_romaji,
|
|
)?;
|
|
|
|
db.create_scalar_function(
|
|
"fullwidth_katakana_to_halfwidth_katakana",
|
|
&FULLWIDTH_KATAKANA_TO_HALFWIDTH_KATAKANA_OPTS,
|
|
fullwidth_katakana_to_halfwidth_katakana,
|
|
)?;
|
|
|
|
db.create_scalar_function(
|
|
"halfwidth_katakana_to_fullwidth_katakana",
|
|
&HALFWIDTH_KATAKANA_TO_FULLWIDTH_KATAKANA_OPTS,
|
|
halfwidth_katakana_to_fullwidth_katakana,
|
|
)?;
|
|
|
|
db.create_scalar_function("normalize_jp", &NORMALIZE_JP_OPTS, normalize_jp)?;
|
|
|
|
Ok(())
|
|
}
|
|
|
|
#[cfg(all(test, feature = "static"))]
|
|
mod test {
|
|
use super::*;
|
|
use sqlite3_ext::{Database, Error, FallibleIterator, FallibleIteratorMut};
|
|
|
|
fn setup() -> Result<Database> {
|
|
let conn = Database::open(":memory:")?;
|
|
init(&conn)?;
|
|
Ok(conn)
|
|
}
|
|
|
|
#[test]
|
|
fn test_basic_sqlite_query() -> Result<()> {
|
|
let conn = setup()?;
|
|
let results: Vec<_> = conn
|
|
.prepare(
|
|
"SELECT
|
|
hiragana_to_katakana('ひらがな'),
|
|
katakana_to_hiragana('カタカナ')
|
|
",
|
|
)?
|
|
.query(())?
|
|
.map(|row| Ok((row[0].get_str()?.to_string(), row[1].get_str()?.to_string())))
|
|
.collect()?;
|
|
assert_eq!(
|
|
results,
|
|
vec![("ヒラガナ".to_string(), "かたかな".to_string())]
|
|
);
|
|
|
|
Ok(())
|
|
}
|
|
|
|
#[test]
|
|
fn test_text_normalization() -> Result<()> {
|
|
let conn = setup()?;
|
|
conn.execute("CREATE TABLE test(value TEXT)", Vec::<&str>::new())?;
|
|
conn.execute(
|
|
"CREATE INDEX idx_normalized_jp ON test(normalize_jp(value))",
|
|
Vec::<&str>::new(),
|
|
)?;
|
|
conn.execute(
|
|
"INSERT INTO test(value) VALUES ('あ'), ('ア'), ('い'), ('イ')",
|
|
Vec::<&str>::new(),
|
|
)?;
|
|
|
|
let results: Vec<_> = conn
|
|
.prepare("SELECT value FROM test WHERE normalize_jp(value) = 'あ'")?
|
|
.query(())?
|
|
.map(|row| Ok(row[0].get_str()?.to_string()))
|
|
.collect()?;
|
|
|
|
assert_eq!(results, vec!["あ".to_string(), "ア".to_string(),]);
|
|
|
|
Ok(())
|
|
}
|
|
}
|