tokenizer: new library replacing buffer2array()

The new code is more robust and more flexible. It provides detailed error information in GError objects.
2009-07-19 15:11:36 +02:00
parent 809c96b53f
commit 1745e68795
3 changed files with 237 additions and 0 deletions
--- a/Makefile.am
+++ b/Makefile.am
@@ -153,6 +153,7 @@ mpd_headers = \
 	src/tag_id3.h \
 	src/tag_print.h \
 	src/tag_save.h \
 	src/tokenizer.h \
 	src/strset.h \
 	src/uri.h \
 	src/utils.h \
@@ -249,6 +250,7 @@ src_mpd_SOURCES = \
 	src/tag_pool.c \
 	src/tag_print.c \
 	src/tag_save.c \
 	src/tokenizer.c \
 	src/strset.c \
 	src/uri.c \
 	src/utils.c \
--- a/src/tokenizer.c
+++ b/src/tokenizer.c
@@ -0,0 +1,167 @@
 /*
 * Copyright (C) 2003-2009 The Music Player Daemon Project
 * http://www.musicpd.org
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License along
 * with this program; if not, write to the Free Software Foundation, Inc.,
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 */
 #include "tokenizer.h"
 #include <stdbool.h>
 #include <assert.h>
 #include <string.h>
 G_GNUC_CONST
 static GQuark
 tokenizer_quark(void)
 {
 	return g_quark_from_static_string("tokenizer");
 }
 static inline bool
 valid_word_first_char(char ch)
 {
 	return g_ascii_isalpha(ch);
 }
 static inline bool
 valid_word_char(char ch)
 {
 	return g_ascii_isalnum(ch) || ch == '_';
 }
 char *
 tokenizer_next_word(char **input_p, GError **error_r)
 {
 	char *word, *input;
 	assert(input_p != NULL);
 	assert(*input_p != NULL);
 	word = input = *input_p;
 	if (*input == 0)
 		return NULL;
 	/* check the first character */
 	if (!valid_word_first_char(*input)) {
 		g_set_error(error_r, tokenizer_quark(), 0,
 			    "Letter expected");
 		return NULL;
 	}
 	/* now iterate over the other characters until we find a
 	   whitespace or end-of-string */
 	while (*++input != 0) {
 		if (g_ascii_isspace(*input)) {
 			/* a whitespace: the word ends here */
 			*input = 0;
 			/* skip all following spaces, too */
 			input = g_strchug(input + 1);
 			break;
 		}
 		if (!valid_word_char(*input)) {
 			*input_p = input;
 			g_set_error(error_r, tokenizer_quark(), 0,
 				    "Invalid word character");
 			return NULL;
 		}
 	}
 	/* end of string: the string is already null-terminated
 	   here */
 	*input_p = input;
 	return word;
 }
 char *
 tokenizer_next_string(char **input_p, GError **error_r)
 {
 	char *word, *dest, *input;
 	assert(input_p != NULL);
 	assert(*input_p != NULL);
 	word = dest = input = *input_p;
 	if (*input == 0)
 		/* end of line */
 		return NULL;
 	/* check for the opening " */
 	if (*input != '"') {
 		g_set_error(error_r, tokenizer_quark(), 0,
 			    "'\"' expected");
 		return NULL;
 	}
 	++input;
 	/* copy all characters */
 	while (*input != '"') {
 		if (*input == '\\')
 			/* the backslash escapes the following
 			   character */
 			++input;
 		if (*input == 0) {
 			/* return input-1 so the caller can see the
 			   difference between "end of line" and
 			   "error" */
 			*input_p = input - 1;
 			g_set_error(error_r, tokenizer_quark(), 0,
 				    "Missing closing '\"'");
 			return NULL;
 		}
 		/* copy one character */
 		*dest++ = *input++;
 	}
 	/* the following character must be a whitespace (or end of
 	   line) */
 	++input;
 	if (*input != 0 && !g_ascii_isspace(*input)) {
 		*input_p = input;
 		g_set_error(error_r, tokenizer_quark(), 0,
 			    "Space expected after closing '\"'");
 		return NULL;
 	}
 	/* finish the string and return it */
 	*dest = 0;
 	*input_p = g_strchug(input);
 	return word;
 }
 char *
 tokenizer_next_word_or_string(char **input_p, GError **error_r)
 {
 	assert(input_p != NULL);
 	assert(*input_p != NULL);
 	if (**input_p == '"')
 		return tokenizer_next_string(input_p, error_r);
 	else
 		return tokenizer_next_word(input_p, error_r);
 }
--- a/src/tokenizer.h
+++ b/src/tokenizer.h
@@ -0,0 +1,68 @@
 /*
 * Copyright (C) 2003-2009 The Music Player Daemon Project
 * http://www.musicpd.org
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License along
 * with this program; if not, write to the Free Software Foundation, Inc.,
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 */
 #ifndef MPD_TOKENIZER_H
 #define MPD_TOKENIZER_H
 #include <glib.h>
 /**
 * Reads the next word from the input string.  This function modifies
 * the input string.
 *
 * @param input_p the input string; this function returns a pointer to
 * the first non-whitespace character of the following token
 * @param error_r if this function returns NULL and **input_p!=0, it
 * optionally provides a GError object in this argument
 * @return a pointer to the null-terminated word, or NULL on error or
 * end of line
 */
 char *
 tokenizer_next_word(char **input_p, GError **error_r);
 /**
 * Reads the next quoted string from the input string.  A backslash
 * escapes the following character.  This function modifies the input
 * string.
 *
 * @param input_p the input string; this function returns a pointer to
 * the first non-whitespace character of the following token
 * @param error_r if this function returns NULL and **input_p!=0, it
 * optionally provides a GError object in this argument
 * @return a pointer to the null-terminated string, or NULL on error
 * or end of line
 */
 char *
 tokenizer_next_string(char **input_p, GError **error_r);
 /**
 * Reads the next word or quoted string from the input.  This is a
 * wrapper for tokenizer_next_word() and tokenizer_next_string().
 *
 * @param input_p the input string; this function returns a pointer to
 * the first non-whitespace character of the following token
 * @param error_r if this function returns NULL and **input_p!=0, it
 * optionally provides a GError object in this argument
 * @return a pointer to the null-terminated string, or NULL on error
 * or end of line
 */
 char *
 tokenizer_next_word_or_string(char **input_p, GError **error_r);
 #endif