Files
empidee/src/response_tokenizer.rs
h7x4 23563cfb9b response_tokenizer: rewrite
This commit contains a rewrite of the response tokenizer, which
introduces lazy parsing of the response, handling of binary data, some
tests, as well as just generally more robustness against errors.
2026-01-09 13:31:18 +09:00

561 lines
18 KiB
Rust

pub type GenericResponseResult<'a> = Result<GenericResponse<'a>, &'a str>;
pub type GenericResponse<'a> = HashMap<&'a str, GenericResponseValue<'a>>;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum GenericResponseValue<'a> {
Text(&'a str),
Binary(&'a [u8]),
// Many(Vec<GenericResponseValue<'a>>),
}
#[derive(Debug, Clone)]
pub struct ResponseAttributes<'a> {
bytestring: &'a [u8],
cursor: usize,
}
impl<'a> ResponseAttributes<'a> {
pub fn new(raw: &'a str) -> Self {
Self::new_from_bytes(raw.as_bytes())
}
pub fn new_from_bytes(bytes: &'a [u8]) -> Self {
Self {
bytestring: bytes,
cursor: 0,
}
}
pub fn is_empty(&self) -> bool {
self.cursor >= self.bytestring.len() || self.bytestring[self.cursor..].starts_with(b"OK")
}
pub fn into_map(
self,
) -> Result<HashMap<&'a str, GenericResponseValue<'a>>, ResponseParserError<'a>> {
self.into()
}
pub fn into_vec(
self,
) -> Result<Vec<(&'a str, GenericResponseValue<'a>)>, ResponseParserError<'a>> {
self.into()
}
pub fn into_lazy_vec(
self,
) -> Vec<Result<(&'a str, GenericResponseValue<'a>), ResponseParserError<'a>>> {
self.into()
}
pub fn verify_all_keys_equal(&self, expected_key: &str) -> Result<(), ResponseParserError<'a>> {
let mut copy = self.clone();
copy.cursor = 0;
for item in copy {
let (key, _) = item?;
if key != expected_key {
return Err(ResponseParserError::UnexpectedProperty(key));
}
}
Ok(())
}
// pub fn get<'a>(&self, key: &str) -> Option<&GenericResponseValue<'a>> {
// self.0.iter().find_map(|(k, v)| if k == &key { Some(v) } else { None })
// }
}
impl<'a> Iterator for ResponseAttributes<'a> {
type Item = Result<(&'a str, GenericResponseValue<'a>), ResponseParserError<'a>>;
fn next(&mut self) -> Option<Self::Item> {
if self.cursor >= self.bytestring.len() {
return Some(Err(ResponseParserError::UnexpectedEOF));
}
if self.bytestring[self.cursor..].starts_with(b"OK") {
return None;
}
let remaining = &self.bytestring[self.cursor..];
let newline_pos = remaining
.iter()
.position(|&b| b == b'\n')
.unwrap_or(remaining.len());
let line = &remaining[..newline_pos];
// Skip empty lines
if line.is_empty() {
self.cursor += newline_pos + 1;
return self.next();
}
// NOTE: it is important that this happens before any None returns,
// so that the iterator advances despite errors.
self.cursor += newline_pos + 1;
let mut keyval = line.splitn(2, |&b| b == b':');
let key_bytes = keyval.next()?;
// TODO: should this be a proper runtime error?
debug_assert!(!key_bytes.is_empty());
debug_assert!(std::str::from_utf8(key_bytes).is_ok());
let key = std::str::from_utf8(key_bytes).ok()?;
// In the case of binary data, the following value will be the byte count
// in decimal, and the actual binary data will follow in the next N bytes,
// followed by a newline.
//
// We parse the number and assign the binary data to the "binary" key.
if key == "binary" {
let byte_count = match keyval.next() {
Some(count) => count.trim_ascii_start(),
None => {
// TODO: throw more specific error
return Some(Err(ResponseParserError::UnexpectedEOF));
}
};
let byte_count_str = match std::str::from_utf8(byte_count) {
Ok(s) => s,
Err(_) => {
// TODO: throw more specific error
return Some(Err(ResponseParserError::SyntaxError(
0,
"Invalid byte count",
)));
}
};
let byte_count: usize = match byte_count_str.parse() {
Ok(n) => n,
Err(_) => {
// TODO: throw more specific error
return Some(Err(ResponseParserError::SyntaxError(
0,
"Invalid byte count",
)));
}
};
let value_start = self.cursor;
let value_end = self.cursor + byte_count;
if value_end > self.bytestring.len() {
return Some(Err(ResponseParserError::UnexpectedEOF));
}
let value_bytes = &self.bytestring[value_start..value_end];
debug_assert!(
self.bytestring[value_end..]
.iter()
.next()
.is_none_or(|&b| b == b'\n')
);
// Skip the binary data and the following newline
self.cursor = value_end + 1;
Some(Ok((key, GenericResponseValue::Binary(value_bytes))))
} else {
let value_bytes = match keyval.next() {
Some(v) => v.trim_ascii_start(),
None => b"",
};
// TODO: this should be a proper runtime error, the specification
// declares that all string values are UTF-8.
debug_assert!(std::str::from_utf8(value_bytes).is_ok());
let value_str = std::str::from_utf8(value_bytes).ok()?;
Some(Ok((key, GenericResponseValue::Text(value_str))))
}
}
}
impl<'a> From<ResponseAttributes<'a>>
for Result<HashMap<&'a str, GenericResponseValue<'a>>, ResponseParserError<'a>>
{
fn from(val: ResponseAttributes<'a>) -> Self {
let mut map = HashMap::new();
for item in val {
let (k, v) = item?;
if map.contains_key(k) {
return Err(ResponseParserError::DuplicateProperty(k));
}
map.insert(k, v);
}
Ok(map)
}
}
impl<'a> From<ResponseAttributes<'a>>
for Vec<Result<(&'a str, GenericResponseValue<'a>), ResponseParserError<'a>>>
{
fn from(val: ResponseAttributes<'a>) -> Self {
val.collect()
}
}
impl<'a> From<ResponseAttributes<'a>>
for Result<Vec<(&'a str, GenericResponseValue<'a>)>, ResponseParserError<'a>>
{
fn from(val: ResponseAttributes<'a>) -> Self {
val.collect()
}
}
// TODO: There should probably be a helper that lets you extract and verify one, two or maybe
// three properties without having to allocate a hashmap to get a nice API. We can retrieve
// the properties by name with a loop on the inner vec.
/*******************/
/* Parsing Helpers */
/*******************/
macro_rules! _expect_property_type {
($property:expr, $name:expr, $variant:ident) => {
match $property {
Some(crate::response_tokenizer::GenericResponseValue::$variant(value)) => Some(value),
Some(value) => {
let actual_type = match value {
crate::response_tokenizer::GenericResponseValue::Text(_) => "Text",
crate::response_tokenizer::GenericResponseValue::Binary(_) => "Binary",
};
return Err(
crate::commands::ResponseParserError::UnexpectedPropertyType(
$name,
actual_type,
),
);
}
None => None,
}
};
}
macro_rules! _parse_optional_property_type {
($name:expr, $property:expr) => {
$property
.map(|value| {
value.parse().map_err(|_| {
crate::commands::ResponseParserError::InvalidProperty($name, value)
})
})
.transpose()?
};
}
macro_rules! _unwrap_optional_property_type {
($name:expr, $property:expr) => {
match $property {
Some(value) => value,
None => return Err(crate::commands::ResponseParserError::MissingProperty($name)),
}
};
}
macro_rules! expect_optional_property_type {
($property:expr, $name:expr, $variant:ident) => {
crate::response_tokenizer::_expect_property_type!($property, $name, $variant)
};
}
macro_rules! expect_property_type {
($property:expr, $name:expr, $variant:ident) => {{
let prop = crate::response_tokenizer::_expect_property_type!($property, $name, $variant);
crate::response_tokenizer::_unwrap_optional_property_type!($name, prop)
}};
}
macro_rules! get_optional_property {
($parts:expr, $name:literal, $variant:ident) => {
crate::response_tokenizer::_expect_property_type!(
{ $parts.get($name).map(|v| *v) },
$name,
$variant
)
};
}
macro_rules! get_property {
($parts:expr, $name:literal, $variant:ident) => {{
let prop = crate::response_tokenizer::_expect_property_type!(
{ $parts.get($name).map(|v| *v) },
$name,
$variant
);
crate::response_tokenizer::_unwrap_optional_property_type!($name, prop)
}};
}
macro_rules! get_and_parse_optional_property {
($parts:ident, $name:literal, $variant:ident) => {{
let prop = crate::response_tokenizer::_expect_property_type!(
{ $parts.get($name).map(|v| *v) },
$name,
$variant
);
crate::response_tokenizer::_parse_optional_property_type!($name, prop)
}};
}
macro_rules! get_and_parse_property {
($parts:ident, $name:literal, $variant:ident) => {{
let prop = crate::response_tokenizer::_expect_property_type!(
{ $parts.get($name).map(|v| *v) },
$name,
$variant
);
let prop = crate::response_tokenizer::_parse_optional_property_type!($name, prop);
crate::response_tokenizer::_unwrap_optional_property_type!($name, prop)
}};
}
macro_rules! get_next_optional_property {
($parts:ident, $variant:ident) => {
match $parts.next() {
Some((name, value)) => {
crate::response_tokenizer::_expect_property_type!({ Some(value) }, name, $variant)
.map(|value| (name, value))
}
None => None,
}
};
}
macro_rules! get_next_property {
($parts:ident, $variant:ident) => {
match $parts.next() {
Some(Ok((name, value))) => (
name,
crate::response_tokenizer::_expect_property_type!({ Some(value) }, name, $variant)
.unwrap(),
),
Some(Err(e)) => return Err(e),
None => return Err(crate::commands::ResponseParserError::UnexpectedEOF),
}
};
}
macro_rules! get_next_and_parse_optional_property {
($parts:ident, $variant:ident) => {
match $parts.next() {
Some((name, value)) => {
let prop = crate::response_tokenizer::_expect_property_type!(
{ Some(value) },
name,
$variant
);
prop.map(|value| {
(
name,
crate::response_tokenizer::_parse_optional_property_type!(name, value),
)
})
}
None => None,
}
};
}
macro_rules! get_next_and_parse_property {
($parts:ident, $variant:ident) => {
match $parts.next() {
Some(Ok((name, value))) => {
let prop = crate::response_tokenizer::_expect_property_type!(
{ Some(value) },
name,
$variant
);
let prop = crate::response_tokenizer::_parse_optional_property_type!(name, prop);
(
name,
crate::response_tokenizer::_unwrap_optional_property_type!(name, prop),
)
}
Some(Err(e)) => return Err(e),
None => return Err(crate::commands::ResponseParserError::UnexpectedEOF),
}
};
}
use std::collections::HashMap;
pub(crate) use _expect_property_type;
pub(crate) use _parse_optional_property_type;
pub(crate) use _unwrap_optional_property_type;
pub(crate) use expect_property_type;
// pub(crate) use expect_optional_property_type;
pub(crate) use get_and_parse_optional_property;
pub(crate) use get_and_parse_property;
// pub(crate) use get_next_and_parse_optional_property;
pub(crate) use get_next_and_parse_property;
// pub(crate) use get_next_optional_property;
pub(crate) use get_next_property;
pub(crate) use get_optional_property;
pub(crate) use get_property;
use crate::commands::ResponseParserError;
#[cfg(test)]
mod tests {
use indoc::indoc;
use super::*;
#[test]
#[cfg(debug_assertions)]
fn test_valid_hashmap_uniqueness_assert() -> Result<(), ResponseParserError<'static>> {
let raw_response = indoc! {
"a: 1
A: 2
A : 3
b: 4
OK"
};
let attrs = ResponseAttributes::new(raw_response);
let map: HashMap<_, _> = attrs.into_map()?;
assert_eq!(map.len(), 4);
Ok(())
}
#[test]
#[cfg(debug_assertions)]
#[should_panic]
fn test_invalid_hashmap_uniqueness_assert() {
let raw_response = indoc! {
"a: 1
b: 2
c: 3
a: 4
OK"
};
ResponseAttributes::new(raw_response).into_map().unwrap();
}
#[test]
fn test_response_attributes_single_attribute() -> Result<(), ResponseParserError<'static>> {
let raw_response = indoc! {
"name: Sticker1
OK"
};
let attrs = ResponseAttributes::new(raw_response);
let map: HashMap<_, _> = attrs.into_map()?;
assert_eq!(map.len(), 1);
assert_eq!(
map.get("name"),
Some(&GenericResponseValue::Text("Sticker1"))
);
Ok(())
}
#[test]
fn test_response_attributes_multiple_attributes() -> Result<(), ResponseParserError<'static>> {
let raw_response = indoc! {
"name: Sticker1
type: emoji
size: 128
OK"
};
let attrs = ResponseAttributes::new(raw_response);
let map: HashMap<_, _> = attrs.into_map()?;
assert_eq!(map.len(), 3);
assert_eq!(
map.get("name"),
Some(&GenericResponseValue::Text("Sticker1"))
);
assert_eq!(map.get("type"), Some(&GenericResponseValue::Text("emoji")));
assert_eq!(map.get("size"), Some(&GenericResponseValue::Text("128")));
Ok(())
}
#[test]
fn test_response_attributes_empty_response() -> Result<(), ResponseParserError<'static>> {
let raw_response = indoc! {
"OK"
};
let attrs = ResponseAttributes::new(raw_response);
let map: HashMap<_, _> = attrs.into_map()?;
assert_eq!(map.len(), 0);
Ok(())
}
#[test]
fn test_response_attributes_unexpected_eof() -> Result<(), ResponseParserError<'static>> {
let raw_response = indoc! {
"name: Sticker1
type: emoji"
};
let attrs = ResponseAttributes::new(raw_response);
let vec: Result<Vec<_>, ResponseParserError> = attrs.into_vec();
assert!(matches!(vec, Err(ResponseParserError::UnexpectedEOF)));
Ok(())
}
#[test]
fn test_response_attributes_repeated_attribute() -> Result<(), ResponseParserError<'static>> {
let raw_response = indoc! {
"name: Sticker1
name: Sticker2
OK"
};
let attrs = ResponseAttributes::new(raw_response);
let vec = attrs.into_vec()?;
assert_eq!(vec.len(), 2);
assert_eq!(vec[0], ("name", GenericResponseValue::Text("Sticker1")));
assert_eq!(vec[1], ("name", GenericResponseValue::Text("Sticker2")));
Ok(())
}
#[test]
fn test_response_attributes_empty_line() -> Result<(), ResponseParserError<'static>> {
let raw_response = indoc! {
"name: Sticker1
type: emoji
OK"
};
let attrs = ResponseAttributes::new(raw_response);
let map: HashMap<_, _> = attrs.into_map()?;
assert_eq!(map.len(), 2);
assert_eq!(
map.get("name"),
Some(&GenericResponseValue::Text("Sticker1"))
);
assert_eq!(map.get("type"), Some(&GenericResponseValue::Text("emoji")));
Ok(())
}
#[test]
fn test_response_attributes_no_value() -> Result<(), ResponseParserError<'static>> {
let raw_response = indoc! {
"name:
type: emoji
OK"
};
let attrs = ResponseAttributes::new(raw_response);
let map: HashMap<_, _> = attrs.into_map()?;
assert_eq!(map.len(), 2);
assert_eq!(map.get("name"), Some(&GenericResponseValue::Text("")));
assert_eq!(map.get("type"), Some(&GenericResponseValue::Text("emoji")));
Ok(())
}
#[test]
fn test_response_attributes_binary_data() -> Result<(), ResponseParserError<'static>> {
let bytestring: &[u8] = b"\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09";
let raw_response = {
let mut response = format!("binary: {}\n", bytestring.len()).into_bytes();
response.extend_from_slice(bytestring);
response.extend_from_slice(b"\nOK");
response
};
let attrs = ResponseAttributes::new_from_bytes(&raw_response);
let map: HashMap<_, _> = attrs.into_map().unwrap();
assert_eq!(map.len(), 1);
assert_eq!(
map.get("binary"),
Some(&GenericResponseValue::Binary(bytestring))
);
Ok(())
}
}