tokenizer: convert to C++
This commit is contained in:
202
src/util/Tokenizer.cxx
Normal file
202
src/util/Tokenizer.cxx
Normal file
@@ -0,0 +1,202 @@
|
||||
/*
|
||||
* Copyright (C) 2003-2013 The Music Player Daemon Project
|
||||
* http://www.musicpd.org
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along
|
||||
* with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*/
|
||||
|
||||
#include "config.h"
|
||||
#include "Tokenizer.hxx"
|
||||
#include "string_util.h"
|
||||
|
||||
#include <glib.h>
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <assert.h>
|
||||
#include <string.h>
|
||||
|
||||
G_GNUC_CONST
|
||||
static GQuark
|
||||
tokenizer_quark(void)
|
||||
{
|
||||
return g_quark_from_static_string("tokenizer");
|
||||
}
|
||||
|
||||
static inline bool
|
||||
valid_word_first_char(char ch)
|
||||
{
|
||||
return g_ascii_isalpha(ch);
|
||||
}
|
||||
|
||||
static inline bool
|
||||
valid_word_char(char ch)
|
||||
{
|
||||
return g_ascii_isalnum(ch) || ch == '_';
|
||||
}
|
||||
|
||||
char *
|
||||
Tokenizer::NextWord(GError **error_r)
|
||||
{
|
||||
char *const word = input;
|
||||
|
||||
if (*input == 0)
|
||||
return nullptr;
|
||||
|
||||
/* check the first character */
|
||||
|
||||
if (!valid_word_first_char(*input)) {
|
||||
g_set_error(error_r, tokenizer_quark(), 0,
|
||||
"Letter expected");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
/* now iterate over the other characters until we find a
|
||||
whitespace or end-of-string */
|
||||
|
||||
while (*++input != 0) {
|
||||
if (g_ascii_isspace(*input)) {
|
||||
/* a whitespace: the word ends here */
|
||||
*input = 0;
|
||||
/* skip all following spaces, too */
|
||||
input = strchug_fast(input + 1);
|
||||
break;
|
||||
}
|
||||
|
||||
if (!valid_word_char(*input)) {
|
||||
g_set_error(error_r, tokenizer_quark(), 0,
|
||||
"Invalid word character");
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
/* end of string: the string is already null-terminated
|
||||
here */
|
||||
|
||||
return word;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
valid_unquoted_char(char ch)
|
||||
{
|
||||
return (unsigned char)ch > 0x20 && ch != '"' && ch != '\'';
|
||||
}
|
||||
|
||||
char *
|
||||
Tokenizer::NextUnquoted(GError **error_r)
|
||||
{
|
||||
char *const word = input;
|
||||
|
||||
if (*input == 0)
|
||||
return nullptr;
|
||||
|
||||
/* check the first character */
|
||||
|
||||
if (!valid_unquoted_char(*input)) {
|
||||
g_set_error(error_r, tokenizer_quark(), 0,
|
||||
"Invalid unquoted character");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
/* now iterate over the other characters until we find a
|
||||
whitespace or end-of-string */
|
||||
|
||||
while (*++input != 0) {
|
||||
if (g_ascii_isspace(*input)) {
|
||||
/* a whitespace: the word ends here */
|
||||
*input = 0;
|
||||
/* skip all following spaces, too */
|
||||
input = strchug_fast(input + 1);
|
||||
break;
|
||||
}
|
||||
|
||||
if (!valid_unquoted_char(*input)) {
|
||||
g_set_error(error_r, tokenizer_quark(), 0,
|
||||
"Invalid unquoted character");
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
/* end of string: the string is already null-terminated
|
||||
here */
|
||||
|
||||
return word;
|
||||
}
|
||||
|
||||
char *
|
||||
Tokenizer::NextString(GError **error_r)
|
||||
{
|
||||
char *const word = input, *dest = input;
|
||||
|
||||
if (*input == 0)
|
||||
/* end of line */
|
||||
return nullptr;
|
||||
|
||||
/* check for the opening " */
|
||||
|
||||
if (*input != '"') {
|
||||
g_set_error(error_r, tokenizer_quark(), 0,
|
||||
"'\"' expected");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
++input;
|
||||
|
||||
/* copy all characters */
|
||||
|
||||
while (*input != '"') {
|
||||
if (*input == '\\')
|
||||
/* the backslash escapes the following
|
||||
character */
|
||||
++input;
|
||||
|
||||
if (*input == 0) {
|
||||
/* return input-1 so the caller can see the
|
||||
difference between "end of line" and
|
||||
"error" */
|
||||
--input;
|
||||
g_set_error(error_r, tokenizer_quark(), 0,
|
||||
"Missing closing '\"'");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
/* copy one character */
|
||||
*dest++ = *input++;
|
||||
}
|
||||
|
||||
/* the following character must be a whitespace (or end of
|
||||
line) */
|
||||
|
||||
++input;
|
||||
if (*input != 0 && !g_ascii_isspace(*input)) {
|
||||
g_set_error(error_r, tokenizer_quark(), 0,
|
||||
"Space expected after closing '\"'");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
/* finish the string and return it */
|
||||
|
||||
*dest = 0;
|
||||
input = strchug_fast(input);
|
||||
return word;
|
||||
}
|
||||
|
||||
char *
|
||||
Tokenizer::NextParam(GError **error_r)
|
||||
{
|
||||
if (*input == '"')
|
||||
return NextString(error_r);
|
||||
else
|
||||
return NextUnquoted(error_r);
|
||||
}
|
99
src/util/Tokenizer.hxx
Normal file
99
src/util/Tokenizer.hxx
Normal file
@@ -0,0 +1,99 @@
|
||||
/*
|
||||
* Copyright (C) 2003-2013 The Music Player Daemon Project
|
||||
* http://www.musicpd.org
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along
|
||||
* with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*/
|
||||
|
||||
#ifndef MPD_TOKENIZER_HXX
|
||||
#define MPD_TOKENIZER_HXX
|
||||
|
||||
#include "gerror.h"
|
||||
|
||||
class Tokenizer {
|
||||
char *input;
|
||||
|
||||
public:
|
||||
/**
|
||||
* @param _input the input string; the contents will be
|
||||
* modified by this class
|
||||
*/
|
||||
constexpr Tokenizer(char *_input):input(_input) {}
|
||||
|
||||
Tokenizer(const Tokenizer &) = delete;
|
||||
Tokenizer &operator=(const Tokenizer &) = delete;
|
||||
|
||||
char *Rest() {
|
||||
return input;
|
||||
}
|
||||
|
||||
char CurrentChar() const {
|
||||
return *input;
|
||||
}
|
||||
|
||||
bool IsEnd() const {
|
||||
return CurrentChar() == 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads the next word.
|
||||
*
|
||||
* @param error_r if this function returns nullptr and
|
||||
* **input_p!=0, it optionally provides a GError object in
|
||||
* this argument
|
||||
* @return a pointer to the null-terminated word, or nullptr
|
||||
* on error or end of line
|
||||
*/
|
||||
char *NextWord(GError **error_r);
|
||||
|
||||
/**
|
||||
* Reads the next unquoted word from the input string.
|
||||
*
|
||||
* @param error_r if this function returns nullptr and **input_p!=0, it
|
||||
* optionally provides a GError object in this argument
|
||||
* @return a pointer to the null-terminated word, or nullptr
|
||||
* on error or end of line
|
||||
*/
|
||||
char *NextUnquoted(GError **error_r);
|
||||
|
||||
/**
|
||||
* Reads the next quoted string from the input string. A backslash
|
||||
* escapes the following character. This function modifies the input
|
||||
* string.
|
||||
*
|
||||
* @param input_p the input string; this function returns a pointer to
|
||||
* the first non-whitespace character of the following token
|
||||
* @param error_r if this function returns nullptr and **input_p!=0, it
|
||||
* optionally provides a GError object in this argument
|
||||
* @return a pointer to the null-terminated string, or nullptr on error
|
||||
* or end of line
|
||||
*/
|
||||
char *NextString(GError **error_r);
|
||||
|
||||
/**
|
||||
* Reads the next unquoted word or quoted string from the
|
||||
* input. This is a wrapper for NextUnquoted() and
|
||||
* NextString().
|
||||
*
|
||||
* @param error_r if this function returns nullptr and
|
||||
* **input_p!=0, it optionally provides a GError object in
|
||||
* this argument
|
||||
* @return a pointer to the null-terminated string, or nullptr
|
||||
* on error or end of line
|
||||
*/
|
||||
char *NextParam(GError **error_r);
|
||||
};
|
||||
|
||||
#endif
|
Reference in New Issue
Block a user