tokenizer: new library replacing buffer2array()
The new code is more robust and more flexible. It provides detailed error information in GError objects.
This commit is contained in:
parent
809c96b53f
commit
1745e68795
|
@ -153,6 +153,7 @@ mpd_headers = \
|
||||||
src/tag_id3.h \
|
src/tag_id3.h \
|
||||||
src/tag_print.h \
|
src/tag_print.h \
|
||||||
src/tag_save.h \
|
src/tag_save.h \
|
||||||
|
src/tokenizer.h \
|
||||||
src/strset.h \
|
src/strset.h \
|
||||||
src/uri.h \
|
src/uri.h \
|
||||||
src/utils.h \
|
src/utils.h \
|
||||||
|
@ -249,6 +250,7 @@ src_mpd_SOURCES = \
|
||||||
src/tag_pool.c \
|
src/tag_pool.c \
|
||||||
src/tag_print.c \
|
src/tag_print.c \
|
||||||
src/tag_save.c \
|
src/tag_save.c \
|
||||||
|
src/tokenizer.c \
|
||||||
src/strset.c \
|
src/strset.c \
|
||||||
src/uri.c \
|
src/uri.c \
|
||||||
src/utils.c \
|
src/utils.c \
|
||||||
|
|
|
@ -0,0 +1,167 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2003-2009 The Music Player Daemon Project
|
||||||
|
* http://www.musicpd.org
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation; either version 2 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License along
|
||||||
|
* with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "tokenizer.h"
|
||||||
|
|
||||||
|
#include <stdbool.h>
|
||||||
|
#include <assert.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
G_GNUC_CONST
|
||||||
|
static GQuark
|
||||||
|
tokenizer_quark(void)
|
||||||
|
{
|
||||||
|
return g_quark_from_static_string("tokenizer");
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline bool
|
||||||
|
valid_word_first_char(char ch)
|
||||||
|
{
|
||||||
|
return g_ascii_isalpha(ch);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline bool
|
||||||
|
valid_word_char(char ch)
|
||||||
|
{
|
||||||
|
return g_ascii_isalnum(ch) || ch == '_';
|
||||||
|
}
|
||||||
|
|
||||||
|
char *
|
||||||
|
tokenizer_next_word(char **input_p, GError **error_r)
|
||||||
|
{
|
||||||
|
char *word, *input;
|
||||||
|
|
||||||
|
assert(input_p != NULL);
|
||||||
|
assert(*input_p != NULL);
|
||||||
|
|
||||||
|
word = input = *input_p;
|
||||||
|
|
||||||
|
if (*input == 0)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
/* check the first character */
|
||||||
|
|
||||||
|
if (!valid_word_first_char(*input)) {
|
||||||
|
g_set_error(error_r, tokenizer_quark(), 0,
|
||||||
|
"Letter expected");
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* now iterate over the other characters until we find a
|
||||||
|
whitespace or end-of-string */
|
||||||
|
|
||||||
|
while (*++input != 0) {
|
||||||
|
if (g_ascii_isspace(*input)) {
|
||||||
|
/* a whitespace: the word ends here */
|
||||||
|
*input = 0;
|
||||||
|
/* skip all following spaces, too */
|
||||||
|
input = g_strchug(input + 1);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!valid_word_char(*input)) {
|
||||||
|
*input_p = input;
|
||||||
|
g_set_error(error_r, tokenizer_quark(), 0,
|
||||||
|
"Invalid word character");
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* end of string: the string is already null-terminated
|
||||||
|
here */
|
||||||
|
|
||||||
|
*input_p = input;
|
||||||
|
return word;
|
||||||
|
}
|
||||||
|
|
||||||
|
char *
|
||||||
|
tokenizer_next_string(char **input_p, GError **error_r)
|
||||||
|
{
|
||||||
|
char *word, *dest, *input;
|
||||||
|
|
||||||
|
assert(input_p != NULL);
|
||||||
|
assert(*input_p != NULL);
|
||||||
|
|
||||||
|
word = dest = input = *input_p;
|
||||||
|
|
||||||
|
if (*input == 0)
|
||||||
|
/* end of line */
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
/* check for the opening " */
|
||||||
|
|
||||||
|
if (*input != '"') {
|
||||||
|
g_set_error(error_r, tokenizer_quark(), 0,
|
||||||
|
"'\"' expected");
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
++input;
|
||||||
|
|
||||||
|
/* copy all characters */
|
||||||
|
|
||||||
|
while (*input != '"') {
|
||||||
|
if (*input == '\\')
|
||||||
|
/* the backslash escapes the following
|
||||||
|
character */
|
||||||
|
++input;
|
||||||
|
|
||||||
|
if (*input == 0) {
|
||||||
|
/* return input-1 so the caller can see the
|
||||||
|
difference between "end of line" and
|
||||||
|
"error" */
|
||||||
|
*input_p = input - 1;
|
||||||
|
g_set_error(error_r, tokenizer_quark(), 0,
|
||||||
|
"Missing closing '\"'");
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* copy one character */
|
||||||
|
*dest++ = *input++;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* the following character must be a whitespace (or end of
|
||||||
|
line) */
|
||||||
|
|
||||||
|
++input;
|
||||||
|
if (*input != 0 && !g_ascii_isspace(*input)) {
|
||||||
|
*input_p = input;
|
||||||
|
g_set_error(error_r, tokenizer_quark(), 0,
|
||||||
|
"Space expected after closing '\"'");
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* finish the string and return it */
|
||||||
|
|
||||||
|
*dest = 0;
|
||||||
|
*input_p = g_strchug(input);
|
||||||
|
return word;
|
||||||
|
}
|
||||||
|
|
||||||
|
char *
|
||||||
|
tokenizer_next_word_or_string(char **input_p, GError **error_r)
|
||||||
|
{
|
||||||
|
assert(input_p != NULL);
|
||||||
|
assert(*input_p != NULL);
|
||||||
|
|
||||||
|
if (**input_p == '"')
|
||||||
|
return tokenizer_next_string(input_p, error_r);
|
||||||
|
else
|
||||||
|
return tokenizer_next_word(input_p, error_r);
|
||||||
|
}
|
|
@ -0,0 +1,68 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2003-2009 The Music Player Daemon Project
|
||||||
|
* http://www.musicpd.org
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation; either version 2 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License along
|
||||||
|
* with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef MPD_TOKENIZER_H
|
||||||
|
#define MPD_TOKENIZER_H
|
||||||
|
|
||||||
|
#include <glib.h>
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reads the next word from the input string. This function modifies
|
||||||
|
* the input string.
|
||||||
|
*
|
||||||
|
* @param input_p the input string; this function returns a pointer to
|
||||||
|
* the first non-whitespace character of the following token
|
||||||
|
* @param error_r if this function returns NULL and **input_p!=0, it
|
||||||
|
* optionally provides a GError object in this argument
|
||||||
|
* @return a pointer to the null-terminated word, or NULL on error or
|
||||||
|
* end of line
|
||||||
|
*/
|
||||||
|
char *
|
||||||
|
tokenizer_next_word(char **input_p, GError **error_r);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reads the next quoted string from the input string. A backslash
|
||||||
|
* escapes the following character. This function modifies the input
|
||||||
|
* string.
|
||||||
|
*
|
||||||
|
* @param input_p the input string; this function returns a pointer to
|
||||||
|
* the first non-whitespace character of the following token
|
||||||
|
* @param error_r if this function returns NULL and **input_p!=0, it
|
||||||
|
* optionally provides a GError object in this argument
|
||||||
|
* @return a pointer to the null-terminated string, or NULL on error
|
||||||
|
* or end of line
|
||||||
|
*/
|
||||||
|
char *
|
||||||
|
tokenizer_next_string(char **input_p, GError **error_r);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reads the next word or quoted string from the input. This is a
|
||||||
|
* wrapper for tokenizer_next_word() and tokenizer_next_string().
|
||||||
|
*
|
||||||
|
* @param input_p the input string; this function returns a pointer to
|
||||||
|
* the first non-whitespace character of the following token
|
||||||
|
* @param error_r if this function returns NULL and **input_p!=0, it
|
||||||
|
* optionally provides a GError object in this argument
|
||||||
|
* @return a pointer to the null-terminated string, or NULL on error
|
||||||
|
* or end of line
|
||||||
|
*/
|
||||||
|
char *
|
||||||
|
tokenizer_next_word_or_string(char **input_p, GError **error_r);
|
||||||
|
|
||||||
|
#endif
|
Loading…
Reference in New Issue