tokenizer: convert to C++

2013-04-08 23:51:39 +02:00
parent 7ec1121cc8
commit 450c26c471
6 changed files with 163 additions and 166 deletions
@@ -0,0 +1,202 @@
+/*
+ * Copyright (C) 2003-2013 The Music Player Daemon Project
+ * http://www.musicpd.org
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include "config.h"
+#include "Tokenizer.hxx"
+#include "string_util.h"
+
+#include <glib.h>
+
+#include <stdbool.h>
+#include <assert.h>
+#include <string.h>
+
+G_GNUC_CONST
+static GQuark
+tokenizer_quark(void)
+{
+	return g_quark_from_static_string("tokenizer");
+}
+
+static inline bool
+valid_word_first_char(char ch)
+{
+	return g_ascii_isalpha(ch);
+}
+
+static inline bool
+valid_word_char(char ch)
+{
+	return g_ascii_isalnum(ch) || ch == '_';
+}
+
+char *
+Tokenizer::NextWord(GError **error_r)
+{
+	char *const word = input;
+
+	if (*input == 0)
+		return nullptr;
+
+	/* check the first character */
+
+	if (!valid_word_first_char(*input)) {
+		g_set_error(error_r, tokenizer_quark(), 0,
+			    "Letter expected");
+		return nullptr;
+	}
+
+	/* now iterate over the other characters until we find a
+	   whitespace or end-of-string */
+
+	while (*++input != 0) {
+		if (g_ascii_isspace(*input)) {
+			/* a whitespace: the word ends here */
+			*input = 0;
+			/* skip all following spaces, too */
+			input = strchug_fast(input + 1);
+			break;
+		}
+
+		if (!valid_word_char(*input)) {
+			g_set_error(error_r, tokenizer_quark(), 0,
+				    "Invalid word character");
+			return nullptr;
+		}
+	}
+
+	/* end of string: the string is already null-terminated
+	   here */
+
+	return word;
+}
+
+static inline bool
+valid_unquoted_char(char ch)
+{
+	return (unsigned char)ch > 0x20 && ch != '"' && ch != '\'';
+}
+
+char *
+Tokenizer::NextUnquoted(GError **error_r)
+{
+	char *const word = input;
+
+	if (*input == 0)
+		return nullptr;
+
+	/* check the first character */
+
+	if (!valid_unquoted_char(*input)) {
+		g_set_error(error_r, tokenizer_quark(), 0,
+			    "Invalid unquoted character");
+		return nullptr;
+	}
+
+	/* now iterate over the other characters until we find a
+	   whitespace or end-of-string */
+
+	while (*++input != 0) {
+		if (g_ascii_isspace(*input)) {
+			/* a whitespace: the word ends here */
+			*input = 0;
+			/* skip all following spaces, too */
+			input = strchug_fast(input + 1);
+			break;
+		}
+
+		if (!valid_unquoted_char(*input)) {
+			g_set_error(error_r, tokenizer_quark(), 0,
+				    "Invalid unquoted character");
+			return nullptr;
+		}
+	}
+
+	/* end of string: the string is already null-terminated
+	   here */
+
+	return word;
+}
+
+char *
+Tokenizer::NextString(GError **error_r)
+{
+	char *const word = input, *dest = input;
+
+	if (*input == 0)
+		/* end of line */
+		return nullptr;
+
+	/* check for the opening " */
+
+	if (*input != '"') {
+		g_set_error(error_r, tokenizer_quark(), 0,
+			    "'\"' expected");
+		return nullptr;
+	}
+
+	++input;
+
+	/* copy all characters */
+
+	while (*input != '"') {
+		if (*input == '\\')
+			/* the backslash escapes the following
+			   character */
+			++input;
+
+		if (*input == 0) {
+			/* return input-1 so the caller can see the
+			   difference between "end of line" and
+			   "error" */
+			--input;
+			g_set_error(error_r, tokenizer_quark(), 0,
+				    "Missing closing '\"'");
+			return nullptr;
+		}
+
+		/* copy one character */
+		*dest++ = *input++;
+	}
+
+	/* the following character must be a whitespace (or end of
+	   line) */
+
+	++input;
+	if (*input != 0 && !g_ascii_isspace(*input)) {
+		g_set_error(error_r, tokenizer_quark(), 0,
+			    "Space expected after closing '\"'");
+		return nullptr;
+	}
+
+	/* finish the string and return it */
+
+	*dest = 0;
+	input = strchug_fast(input);
+	return word;
+}
+
+char *
+Tokenizer::NextParam(GError **error_r)
+{
+	if (*input == '"')
+		return NextString(error_r);
+	else
+		return NextUnquoted(error_r);
+}
@@ -0,0 +1,99 @@
+/*
+ * Copyright (C) 2003-2013 The Music Player Daemon Project
+ * http://www.musicpd.org
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#ifndef MPD_TOKENIZER_HXX
+#define MPD_TOKENIZER_HXX
+
+#include "gerror.h"
+
+class Tokenizer {
+	char *input;
+
+public:
+	/**
+	 * @param _input the input string; the contents will be
+	 * modified by this class
+	 */
+	constexpr Tokenizer(char *_input):input(_input) {}
+
+	Tokenizer(const Tokenizer &) = delete;
+	Tokenizer &operator=(const Tokenizer &) = delete;
+
+	char *Rest() {
+		return input;
+	}
+
+	char CurrentChar() const {
+		return *input;
+	}
+
+	bool IsEnd() const {
+		return CurrentChar() == 0;
+	}
+
+	/**
+	 * Reads the next word.
+	 *
+	 * @param error_r if this function returns nullptr and
+	 * **input_p!=0, it optionally provides a GError object in
+	 * this argument
+	 * @return a pointer to the null-terminated word, or nullptr
+	 * on error or end of line
+	 */
+	char *NextWord(GError **error_r);
+
+	/**
+	 * Reads the next unquoted word from the input string.
+	 *
+	 * @param error_r if this function returns nullptr and **input_p!=0, it
+	 * optionally provides a GError object in this argument
+	 * @return a pointer to the null-terminated word, or nullptr
+	 * on error or end of line
+	 */
+	char *NextUnquoted(GError **error_r);
+
+	/**
+	 * Reads the next quoted string from the input string.  A backslash
+	 * escapes the following character.  This function modifies the input
+	 * string.
+	 *
+	 * @param input_p the input string; this function returns a pointer to
+	 * the first non-whitespace character of the following token
+	 * @param error_r if this function returns nullptr and **input_p!=0, it
+	 * optionally provides a GError object in this argument
+	 * @return a pointer to the null-terminated string, or nullptr on error
+	 * or end of line
+	 */
+	char *NextString(GError **error_r);
+
+	/**
+	 * Reads the next unquoted word or quoted string from the
+	 * input.  This is a wrapper for NextUnquoted() and
+	 * NextString().
+	 *
+	 * @param error_r if this function returns nullptr and
+	 * **input_p!=0, it optionally provides a GError object in
+	 * this argument
+	 * @return a pointer to the null-terminated string, or nullptr
+	 * on error or end of line
+	 */
+	char *NextParam(GError **error_r);
+};
+
+#endif