Add text normalization function
Build and test / check (push) Successful in 44s
Build and test / build-dynamic-library (push) Successful in 48s
Build and test / build-static-library (push) Successful in 47s
Build and test / test (push) Successful in 1m5s

This commit is contained in:
2026-05-07 23:24:13 +09:00
parent cb1556c7d9
commit 2fc07184e9
2 changed files with 51 additions and 0 deletions
+43
View File
@@ -1,4 +1,5 @@
mod kana_transliteration;
mod text_normalization;
use sqlite3_ext::{
Connection, FromValue, Result, ValueRef, ValueType, function::Context, sqlite3_ext_fn,
@@ -13,6 +14,8 @@ use crate::kana_transliteration::{
transliterate_katakana_to_hiragana,
};
use crate::text_normalization::normalize_japanese_text;
#[sqlite3_ext_fn(n_args = 1, deterministic)]
fn hiragana_to_katakana(ctx: &mut Context, args: &mut [&mut ValueRef]) -> Result<()> {
if args[0].value_type() != ValueType::Text {
@@ -109,6 +112,20 @@ fn halfwidth_katakana_to_fullwidth_katakana(
Ok(())
}
#[sqlite3_ext_fn(n_args = 1, deterministic)]
fn normalize_jp(ctx: &mut Context, args: &mut [&mut ValueRef]) -> Result<()> {
if args[0].value_type() != ValueType::Text {
return Err(sqlite3_ext::Error::Module(
"normalize_jp requires a string argument".to_string(),
));
}
let arg = args[0].get_str()?;
let result = normalize_japanese_text(arg);
ctx.set_result(result)?;
Ok(())
}
#[sqlite3_ext_main(persistent)]
fn init(db: &Connection) -> Result<()> {
db.create_scalar_function(
@@ -147,6 +164,8 @@ fn init(db: &Connection) -> Result<()> {
halfwidth_katakana_to_fullwidth_katakana,
)?;
db.create_scalar_function("normalize_jp", &NORMALIZE_JP_OPTS, normalize_jp)?;
Ok(())
}
@@ -181,4 +200,28 @@ mod test {
Ok(())
}
#[test]
fn test_text_normalization() -> Result<()> {
let conn = setup()?;
conn.execute("CREATE TABLE test(value TEXT)", Vec::<&str>::new())?;
conn.execute(
"CREATE INDEX idx_normalized_jp ON test(normalize_jp(value))",
Vec::<&str>::new(),
)?;
conn.execute(
"INSERT INTO test(value) VALUES ('あ'), ('ア'), ('い'), ('イ')",
Vec::<&str>::new(),
)?;
let results: Vec<_> = conn
.prepare("SELECT value FROM test WHERE normalize_jp(value) = 'あ'")?
.query(())?
.map(|row| Ok(row[0].get_str()?.to_string()))
.collect()?;
assert_eq!(results, vec!["".to_string(), "".to_string(),]);
Ok(())
}
}
+8
View File
@@ -0,0 +1,8 @@
use crate::kana_transliteration::{
transliterate_halfwidth_katakana_to_fullwidth_katakana, transliterate_katakana_to_hiragana,
};
pub fn normalize_japanese_text(text: &str) -> String {
let text = transliterate_halfwidth_katakana_to_fullwidth_katakana(text);
transliterate_katakana_to_hiragana(&text)
}