2023-03-06 14:42:04 +01:00
|
|
|
// SPDX-License-Identifier: BSD-2-Clause
|
|
|
|
// author: Max Kellermann <max.kellermann@gmail.com>
|
2009-07-19 15:11:36 +02:00
|
|
|
|
2013-04-08 23:51:39 +02:00
|
|
|
#include "Tokenizer.hxx"
|
2013-10-19 15:25:32 +02:00
|
|
|
#include "CharUtil.hxx"
|
2017-07-05 17:20:02 +02:00
|
|
|
#include "StringStrip.hxx"
|
2009-07-19 15:11:36 +02:00
|
|
|
|
2015-12-16 11:34:26 +01:00
|
|
|
#include <stdexcept>
|
2009-07-19 15:11:36 +02:00
|
|
|
|
|
|
|
static inline bool
|
|
|
|
valid_word_first_char(char ch)
|
|
|
|
{
|
2013-10-19 15:25:32 +02:00
|
|
|
return IsAlphaASCII(ch);
|
2009-07-19 15:11:36 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
static inline bool
|
|
|
|
valid_word_char(char ch)
|
|
|
|
{
|
2013-10-19 15:25:32 +02:00
|
|
|
return IsAlphaNumericASCII(ch) || ch == '_';
|
2009-07-19 15:11:36 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
char *
|
2015-12-16 11:34:26 +01:00
|
|
|
Tokenizer::NextWord()
|
2009-07-19 15:11:36 +02:00
|
|
|
{
|
2013-04-08 23:51:39 +02:00
|
|
|
char *const word = input;
|
2009-07-19 15:11:36 +02:00
|
|
|
|
|
|
|
if (*input == 0)
|
2013-04-08 23:51:39 +02:00
|
|
|
return nullptr;
|
2009-07-19 15:11:36 +02:00
|
|
|
|
|
|
|
/* check the first character */
|
|
|
|
|
2015-12-16 11:34:26 +01:00
|
|
|
if (!valid_word_first_char(*input))
|
|
|
|
throw std::runtime_error("Letter expected");
|
2009-07-19 15:11:36 +02:00
|
|
|
|
|
|
|
/* now iterate over the other characters until we find a
|
|
|
|
whitespace or end-of-string */
|
|
|
|
|
|
|
|
while (*++input != 0) {
|
2014-08-07 15:05:27 +02:00
|
|
|
if (IsWhitespaceFast(*input)) {
|
2009-07-19 15:11:36 +02:00
|
|
|
/* a whitespace: the word ends here */
|
|
|
|
*input = 0;
|
|
|
|
/* skip all following spaces, too */
|
2014-08-07 14:53:07 +02:00
|
|
|
input = StripLeft(input + 1);
|
2009-07-19 15:11:36 +02:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2015-12-16 11:34:26 +01:00
|
|
|
if (!valid_word_char(*input))
|
|
|
|
throw std::runtime_error("Invalid word character");
|
2009-07-19 15:11:36 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/* end of string: the string is already null-terminated
|
|
|
|
here */
|
|
|
|
|
|
|
|
return word;
|
|
|
|
}
|
|
|
|
|
2009-09-25 00:53:15 +02:00
|
|
|
static inline bool
|
|
|
|
valid_unquoted_char(char ch)
|
|
|
|
{
|
|
|
|
return (unsigned char)ch > 0x20 && ch != '"' && ch != '\'';
|
|
|
|
}
|
|
|
|
|
|
|
|
char *
|
2015-12-16 11:34:26 +01:00
|
|
|
Tokenizer::NextUnquoted()
|
2009-09-25 00:53:15 +02:00
|
|
|
{
|
2013-04-08 23:51:39 +02:00
|
|
|
char *const word = input;
|
2009-09-25 00:53:15 +02:00
|
|
|
|
|
|
|
if (*input == 0)
|
2013-04-08 23:51:39 +02:00
|
|
|
return nullptr;
|
2009-09-25 00:53:15 +02:00
|
|
|
|
|
|
|
/* check the first character */
|
|
|
|
|
2015-12-16 11:34:26 +01:00
|
|
|
if (!valid_unquoted_char(*input))
|
|
|
|
throw std::runtime_error("Invalid unquoted character");
|
2009-09-25 00:53:15 +02:00
|
|
|
|
|
|
|
/* now iterate over the other characters until we find a
|
|
|
|
whitespace or end-of-string */
|
|
|
|
|
|
|
|
while (*++input != 0) {
|
2014-08-07 15:05:27 +02:00
|
|
|
if (IsWhitespaceFast(*input)) {
|
2009-09-25 00:53:15 +02:00
|
|
|
/* a whitespace: the word ends here */
|
|
|
|
*input = 0;
|
|
|
|
/* skip all following spaces, too */
|
2014-08-07 14:53:07 +02:00
|
|
|
input = StripLeft(input + 1);
|
2009-09-25 00:53:15 +02:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2015-12-16 11:34:26 +01:00
|
|
|
if (!valid_unquoted_char(*input))
|
|
|
|
throw std::runtime_error("Invalid unquoted character");
|
2009-09-25 00:53:15 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/* end of string: the string is already null-terminated
|
|
|
|
here */
|
|
|
|
|
|
|
|
return word;
|
|
|
|
}
|
|
|
|
|
2009-07-19 15:11:36 +02:00
|
|
|
char *
|
2015-12-16 11:34:26 +01:00
|
|
|
Tokenizer::NextString()
|
2009-07-19 15:11:36 +02:00
|
|
|
{
|
2013-04-08 23:51:39 +02:00
|
|
|
char *const word = input, *dest = input;
|
2009-07-19 15:11:36 +02:00
|
|
|
|
|
|
|
if (*input == 0)
|
|
|
|
/* end of line */
|
2013-04-08 23:51:39 +02:00
|
|
|
return nullptr;
|
2009-07-19 15:11:36 +02:00
|
|
|
|
|
|
|
/* check for the opening " */
|
|
|
|
|
2015-12-16 11:34:26 +01:00
|
|
|
if (*input != '"')
|
|
|
|
throw std::runtime_error("'\"' expected");
|
2009-07-19 15:11:36 +02:00
|
|
|
|
|
|
|
++input;
|
|
|
|
|
|
|
|
/* copy all characters */
|
|
|
|
|
|
|
|
while (*input != '"') {
|
|
|
|
if (*input == '\\')
|
|
|
|
/* the backslash escapes the following
|
|
|
|
character */
|
|
|
|
++input;
|
|
|
|
|
2015-12-16 11:34:26 +01:00
|
|
|
if (*input == 0)
|
|
|
|
throw std::runtime_error("Missing closing '\"'");
|
2009-07-19 15:11:36 +02:00
|
|
|
|
|
|
|
/* copy one character */
|
|
|
|
*dest++ = *input++;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* the following character must be a whitespace (or end of
|
|
|
|
line) */
|
|
|
|
|
|
|
|
++input;
|
2015-12-16 11:34:26 +01:00
|
|
|
if (!IsWhitespaceFast(*input))
|
|
|
|
throw std::runtime_error("Space expected after closing '\"'");
|
2009-07-19 15:11:36 +02:00
|
|
|
|
|
|
|
/* finish the string and return it */
|
|
|
|
|
|
|
|
*dest = 0;
|
2014-08-07 14:53:07 +02:00
|
|
|
input = StripLeft(input);
|
2009-07-19 15:11:36 +02:00
|
|
|
return word;
|
|
|
|
}
|
|
|
|
|
|
|
|
char *
|
2015-12-16 11:34:26 +01:00
|
|
|
Tokenizer::NextParam()
|
2009-07-19 15:11:36 +02:00
|
|
|
{
|
2013-04-08 23:51:39 +02:00
|
|
|
if (*input == '"')
|
2015-12-16 11:34:26 +01:00
|
|
|
return NextString();
|
2009-07-19 15:11:36 +02:00
|
|
|
else
|
2015-12-16 11:34:26 +01:00
|
|
|
return NextUnquoted();
|
2009-07-19 15:11:36 +02:00
|
|
|
}
|