song/StringFilter: support regular expressions with "=~" and "!~"

This feature requires `libpcre`.
This commit is contained in:
Max Kellermann
2018-11-07 00:28:15 +01:00
parent fee75dc766
commit 72184dccfc
13 changed files with 294 additions and 1 deletions

View File

@@ -0,0 +1,66 @@
/*
* Copyright 2007-2018 Content Management AG
* All rights reserved.
*
* author: Max Kellermann <mk@cm4all.com>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* FOUNDATION OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
* OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef REGEX_POINTER_HXX
#define REGEX_POINTER_HXX
#include "util/StringView.hxx"
#include "util/Compiler.h"
#include <pcre.h>
#include <array>
class RegexPointer {
protected:
pcre *re = nullptr;
pcre_extra *extra = nullptr;
unsigned n_capture = 0;
public:
constexpr bool IsDefined() const noexcept {
return re != nullptr;
}
gcc_pure
bool Match(StringView s) const noexcept {
/* we don't need the data written to ovector, but PCRE can
omit internal allocations if we pass a buffer to
pcre_exec() */
std::array<int, 16> ovector;
return pcre_exec(re, extra, s.data, s.size,
0, 0, &ovector.front(), ovector.size()) >= 0;
}
};
#endif

View File

@@ -0,0 +1,71 @@
/*
* Copyright 2007-2018 Content Management AG
* All rights reserved.
*
* author: Max Kellermann <mk@cm4all.com>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* FOUNDATION OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
* OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "UniqueRegex.hxx"
#include "util/RuntimeError.hxx"
void
UniqueRegex::Compile(const char *pattern, bool anchored, bool capture,
bool caseless)
{
constexpr int default_options = PCRE_DOTALL|PCRE_NO_AUTO_CAPTURE|PCRE_UTF8;
int options = default_options;
if (anchored)
options |= PCRE_ANCHORED;
if (capture)
options &= ~PCRE_NO_AUTO_CAPTURE;
if (caseless)
options |= PCRE_CASELESS;
const char *error_string;
int error_offset;
re = pcre_compile(pattern, options, &error_string, &error_offset, nullptr);
if (re == nullptr)
throw FormatRuntimeError("Error in regex at offset %d: %s",
error_offset, error_string);
int study_options = 0;
#ifdef PCRE_CONFIG_JIT
study_options |= PCRE_STUDY_JIT_COMPILE;
#endif
extra = pcre_study(re, study_options, &error_string);
if (extra == nullptr && error_string != nullptr) {
pcre_free(re);
re = nullptr;
throw FormatRuntimeError("Regex study error: %s", error_string);
}
int n;
if (capture && pcre_fullinfo(re, extra, PCRE_INFO_CAPTURECOUNT, &n) == 0)
n_capture = n;
}

View File

@@ -0,0 +1,79 @@
/*
* Copyright 2007-2018 Content Management AG
* All rights reserved.
*
* author: Max Kellermann <mk@cm4all.com>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* FOUNDATION OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
* OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef UNIQUE_REGEX_HXX
#define UNIQUE_REGEX_HXX
#include "RegexPointer.hxx"
#include "util/StringView.hxx"
#include <algorithm>
#include <pcre.h>
class UniqueRegex : public RegexPointer {
public:
UniqueRegex() = default;
UniqueRegex(const char *pattern, bool anchored, bool capture,
bool caseless) {
Compile(pattern, anchored, capture, caseless);
}
UniqueRegex(UniqueRegex &&src) noexcept:RegexPointer(src) {
src.re = nullptr;
src.extra = nullptr;
}
~UniqueRegex() noexcept {
pcre_free(re);
#ifdef PCRE_CONFIG_JIT
pcre_free_study(extra);
#else
pcre_free(extra);
#endif
}
UniqueRegex &operator=(UniqueRegex &&src) {
using std::swap;
swap<RegexPointer>(*this, src);
return *this;
}
/**
* Throws std::runtime_error on error.
*/
void Compile(const char *pattern, bool anchored, bool capture,
bool caseless);
};
#endif

21
src/lib/pcre/meson.build Normal file
View File

@@ -0,0 +1,21 @@
pcre_dep = dependency('libpcre', required: get_option('pcre'))
conf.set('HAVE_PCRE', pcre_dep.found())
if not pcre_dep.found()
subdir_done()
endif
pcre = static_library(
'pcre',
'UniqueRegex.cxx',
include_directories: inc,
dependencies: [
pcre_dep,
],
)
pcre_dep = declare_dependency(
link_with: pcre,
dependencies: [
pcre_dep,
],
)