use crate::commands::*; #[derive(Clone, Debug)] pub struct RequestTokenizer<'a> { pub(super) inner: &'a str, pub(super) cursor: usize, } impl<'a> RequestTokenizer<'a> { #[inline] pub fn new(input: &'a str) -> Self { Self { inner: input, cursor: 0, } } } #[inline] fn parse_command_name(input: &str) -> Option<&str> { // TODO: Create static trie of command names on startup for more efficient matching let mut sorted_commands = COMMAND_NAMES.to_vec(); sorted_commands.sort_by_key(|b| std::cmp::Reverse(b.len())); sorted_commands .into_iter() .find(|&command| input.starts_with(command)) } impl<'a> Iterator for RequestTokenizer<'a> { type Item = &'a str; // Split on strings and whitespace #[inline] fn next(&mut self) -> Option<&'a str> { #[inline] fn is_whitespace(byte: u8) -> bool { byte.is_ascii_whitespace() } let bytes = self.inner.as_bytes(); let len = bytes.len(); let should_parse_command = self.cursor == 0; // Skip leading whitespace while self.cursor < len && is_whitespace(bytes[self.cursor]) { self.cursor += 1; } if should_parse_command { if let Some(command) = parse_command_name(&self.inner[self.cursor..]) { self.cursor += command.len(); return Some(command); } else { return None; } } if self.cursor >= len { return None; } let start = self.cursor; if bytes[self.cursor] == b'"' { // Quoted string self.cursor += 1; // Skip opening quote while self.cursor < len { if bytes[self.cursor] == b'\\' && self.cursor + 1 < len && bytes[self.cursor + 1] == b'"' { self.cursor += 2; // Skip escaped quote continue; } if bytes[self.cursor] == b'"' { let end = self.cursor; self.cursor += 1; // Skip closing quote return Some(&self.inner[start + 1..end]); } self.cursor += 1; } // If we reach here, there was no closing quote Some(&self.inner[start + 1..]) } else { // Unquoted string while self.cursor < len && !is_whitespace(bytes[self.cursor]) { self.cursor += 1; } Some(&self.inner[start..self.cursor]) } } #[inline] fn size_hint(&self) -> (usize, Option) { (0, Some(self.inner.len())) } } #[cfg(test)] mod tests { use super::RequestTokenizer; #[test] fn test_request_tokenizer() { let input = r#"add arg1 "arg two" arg3 "arg four with spaces""#; let mut tokenizer = RequestTokenizer::new(input); assert_eq!(tokenizer.next(), Some("add")); assert_eq!(tokenizer.next(), Some("arg1")); assert_eq!(tokenizer.next(), Some("arg two")); assert_eq!(tokenizer.next(), Some("arg3")); assert_eq!(tokenizer.next(), Some("arg four with spaces")); assert_eq!(tokenizer.next(), None); } #[test] fn test_request_tokenizer_invalid_command() { let input = r#"invalid_command arg1 "arg two" arg3 "arg four with spaces""#; let mut tokenizer = RequestTokenizer::new(input); assert_eq!(tokenizer.next(), None); } #[test] fn test_request_tokenizer_unclosed_quote() { let input = r#"add arg1 "arg two arg3"#; let mut tokenizer = RequestTokenizer::new(input); assert_eq!(tokenizer.next(), Some("add")); assert_eq!(tokenizer.next(), Some("arg1")); assert_eq!(tokenizer.next(), Some("arg two arg3")); assert_eq!(tokenizer.next(), None); } #[test] fn test_request_tokenizer_only_whitespace() { let input = r#" "#; let mut tokenizer = RequestTokenizer::new(input); assert_eq!(tokenizer.next(), None); } #[test] fn test_request_tokenizer_escape_quotes() { let input = r#"add "arg with a \" quote" arg2"#; let mut tokenizer = RequestTokenizer::new(input); assert_eq!(tokenizer.next(), Some("add")); assert_eq!(tokenizer.next(), Some(r#"arg with a \" quote"#)); assert_eq!(tokenizer.next(), Some("arg2")); assert_eq!(tokenizer.next(), None); } #[test] fn test_request_tokenizer_multitoken_command() { let input = r#"protocol all arg"#; let mut tokenizer = RequestTokenizer::new(input); assert_eq!(tokenizer.next(), Some("protocol all")); assert_eq!(tokenizer.next(), Some("arg")); assert_eq!(tokenizer.next(), None); let input = r#"protocol arg"#; let mut tokenizer = RequestTokenizer::new(input); assert_eq!(tokenizer.next(), Some("protocol")); assert_eq!(tokenizer.next(), Some("arg")); assert_eq!(tokenizer.next(), None); } // TODO: Implement ignoring spacing within command names #[test] #[ignore = "Currently, arbitrary spacing within commands names is not supported"] fn test_request_tokenizer_ignore_spacing_in_command() { let input = r#" protocol all arg "#; let mut tokenizer = RequestTokenizer::new(input); assert_eq!(tokenizer.next(), Some("protocol all")); assert_eq!(tokenizer.next(), Some("arg")); assert_eq!(tokenizer.next(), None); let input = r#" protocol arg "#; let mut tokenizer = RequestTokenizer::new(input); assert_eq!(tokenizer.next(), Some("protocol")); assert_eq!(tokenizer.next(), Some("arg")); assert_eq!(tokenizer.next(), None); } }