187 lines
5.8 KiB
Rust
187 lines
5.8 KiB
Rust
use crate::commands::*;
|
|
|
|
#[derive(Clone, Debug)]
|
|
pub struct RequestTokenizer<'a> {
|
|
pub(super) inner: &'a str,
|
|
pub(super) cursor: usize,
|
|
}
|
|
|
|
impl<'a> RequestTokenizer<'a> {
|
|
#[inline]
|
|
pub fn new(input: &'a str) -> Self {
|
|
Self {
|
|
inner: input,
|
|
cursor: 0,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[inline]
|
|
fn parse_command_name(input: &str) -> Option<&str> {
|
|
// TODO: Create static trie of command names on startup for more efficient matching
|
|
let mut sorted_commands = COMMAND_NAMES.to_vec();
|
|
sorted_commands.sort_by_key(|b| std::cmp::Reverse(b.len()));
|
|
sorted_commands
|
|
.into_iter()
|
|
.find(|&command| input.starts_with(command))
|
|
}
|
|
|
|
impl<'a> Iterator for RequestTokenizer<'a> {
|
|
type Item = &'a str;
|
|
|
|
// Split on strings and whitespace
|
|
#[inline]
|
|
fn next(&mut self) -> Option<&'a str> {
|
|
#[inline]
|
|
fn is_whitespace(byte: u8) -> bool {
|
|
byte.is_ascii_whitespace()
|
|
}
|
|
|
|
let bytes = self.inner.as_bytes();
|
|
let len = bytes.len();
|
|
|
|
let should_parse_command = self.cursor == 0;
|
|
|
|
// Skip leading whitespace
|
|
while self.cursor < len && is_whitespace(bytes[self.cursor]) {
|
|
self.cursor += 1;
|
|
}
|
|
|
|
if should_parse_command {
|
|
if let Some(command) = parse_command_name(&self.inner[self.cursor..]) {
|
|
self.cursor += command.len();
|
|
return Some(command);
|
|
} else {
|
|
return None;
|
|
}
|
|
}
|
|
|
|
if self.cursor >= len {
|
|
return None;
|
|
}
|
|
|
|
let start = self.cursor;
|
|
|
|
if bytes[self.cursor] == b'"' {
|
|
// Quoted string
|
|
self.cursor += 1; // Skip opening quote
|
|
while self.cursor < len {
|
|
if bytes[self.cursor] == b'\\'
|
|
&& self.cursor + 1 < len
|
|
&& bytes[self.cursor + 1] == b'"'
|
|
{
|
|
self.cursor += 2; // Skip escaped quote
|
|
continue;
|
|
}
|
|
if bytes[self.cursor] == b'"' {
|
|
let end = self.cursor;
|
|
self.cursor += 1; // Skip closing quote
|
|
return Some(&self.inner[start + 1..end]);
|
|
}
|
|
self.cursor += 1;
|
|
}
|
|
// If we reach here, there was no closing quote
|
|
Some(&self.inner[start + 1..])
|
|
} else {
|
|
// Unquoted string
|
|
while self.cursor < len && !is_whitespace(bytes[self.cursor]) {
|
|
self.cursor += 1;
|
|
}
|
|
Some(&self.inner[start..self.cursor])
|
|
}
|
|
}
|
|
|
|
#[inline]
|
|
fn size_hint(&self) -> (usize, Option<usize>) {
|
|
(0, Some(self.inner.len()))
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::RequestTokenizer;
|
|
|
|
#[test]
|
|
fn test_request_tokenizer() {
|
|
let input = r#"add arg1 "arg two" arg3 "arg four with spaces""#;
|
|
let mut tokenizer = RequestTokenizer::new(input);
|
|
|
|
assert_eq!(tokenizer.next(), Some("add"));
|
|
assert_eq!(tokenizer.next(), Some("arg1"));
|
|
assert_eq!(tokenizer.next(), Some("arg two"));
|
|
assert_eq!(tokenizer.next(), Some("arg3"));
|
|
assert_eq!(tokenizer.next(), Some("arg four with spaces"));
|
|
assert_eq!(tokenizer.next(), None);
|
|
}
|
|
|
|
#[test]
|
|
fn test_request_tokenizer_invalid_command() {
|
|
let input = r#"invalid_command arg1 "arg two" arg3 "arg four with spaces""#;
|
|
let mut tokenizer = RequestTokenizer::new(input);
|
|
|
|
assert_eq!(tokenizer.next(), None);
|
|
}
|
|
|
|
#[test]
|
|
fn test_request_tokenizer_unclosed_quote() {
|
|
let input = r#"add arg1 "arg two arg3"#;
|
|
let mut tokenizer = RequestTokenizer::new(input);
|
|
|
|
assert_eq!(tokenizer.next(), Some("add"));
|
|
assert_eq!(tokenizer.next(), Some("arg1"));
|
|
assert_eq!(tokenizer.next(), Some("arg two arg3"));
|
|
assert_eq!(tokenizer.next(), None);
|
|
}
|
|
|
|
#[test]
|
|
fn test_request_tokenizer_only_whitespace() {
|
|
let input = r#" "#;
|
|
let mut tokenizer = RequestTokenizer::new(input);
|
|
assert_eq!(tokenizer.next(), None);
|
|
}
|
|
|
|
#[test]
|
|
fn test_request_tokenizer_escape_quotes() {
|
|
let input = r#"add "arg with a \" quote" arg2"#;
|
|
let mut tokenizer = RequestTokenizer::new(input);
|
|
|
|
assert_eq!(tokenizer.next(), Some("add"));
|
|
assert_eq!(tokenizer.next(), Some(r#"arg with a \" quote"#));
|
|
assert_eq!(tokenizer.next(), Some("arg2"));
|
|
assert_eq!(tokenizer.next(), None);
|
|
}
|
|
|
|
#[test]
|
|
fn test_request_tokenizer_multitoken_command() {
|
|
let input = r#"protocol all arg"#;
|
|
let mut tokenizer = RequestTokenizer::new(input);
|
|
assert_eq!(tokenizer.next(), Some("protocol all"));
|
|
assert_eq!(tokenizer.next(), Some("arg"));
|
|
assert_eq!(tokenizer.next(), None);
|
|
|
|
let input = r#"protocol arg"#;
|
|
let mut tokenizer = RequestTokenizer::new(input);
|
|
assert_eq!(tokenizer.next(), Some("protocol"));
|
|
assert_eq!(tokenizer.next(), Some("arg"));
|
|
assert_eq!(tokenizer.next(), None);
|
|
}
|
|
|
|
// TODO: Implement ignoring spacing within command names
|
|
#[test]
|
|
#[ignore = "Currently, arbitrary spacing within commands names is not supported"]
|
|
fn test_request_tokenizer_ignore_spacing_in_command() {
|
|
let input = r#" protocol all arg "#;
|
|
|
|
let mut tokenizer = RequestTokenizer::new(input);
|
|
assert_eq!(tokenizer.next(), Some("protocol all"));
|
|
assert_eq!(tokenizer.next(), Some("arg"));
|
|
assert_eq!(tokenizer.next(), None);
|
|
|
|
let input = r#" protocol arg "#;
|
|
let mut tokenizer = RequestTokenizer::new(input);
|
|
assert_eq!(tokenizer.next(), Some("protocol"));
|
|
assert_eq!(tokenizer.next(), Some("arg"));
|
|
assert_eq!(tokenizer.next(), None);
|
|
}
|
|
}
|