diff --git a/NEWS b/NEWS index 7d378490b..4ebb03969 100644 --- a/NEWS +++ b/NEWS @@ -1,4 +1,5 @@ ver 0.23.5 (not yet released) +* migrate to PCRE2 * GCC 12 build fixes ver 0.23.4 (2021/11/11) diff --git a/src/db/plugins/meson.build b/src/db/plugins/meson.build index 8ea565348..a7a179f29 100644 --- a/src/db/plugins/meson.build +++ b/src/db/plugins/meson.build @@ -35,6 +35,7 @@ db_plugins = static_library( include_directories: inc, dependencies: [ upnp_dep, + pcre_dep, libmpdclient_dep, log_dep, ], diff --git a/src/lib/pcre/Error.cxx b/src/lib/pcre/Error.cxx new file mode 100644 index 000000000..24ec43770 --- /dev/null +++ b/src/lib/pcre/Error.cxx @@ -0,0 +1,49 @@ +/* + * Copyright 2007-2021 CM4all GmbH + * All rights reserved. + * + * author: Max Kellermann + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * FOUNDATION OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "Error.hxx" + +#include + +namespace Pcre { + +ErrorCategory error_category; + +std::string +ErrorCategory::message(int condition) const +{ + PCRE2_UCHAR8 buffer[256]; + pcre2_get_error_message_8(condition, buffer, std::size(buffer)); + return std::string{(const char *)buffer}; +} + +} // namespace Pcre diff --git a/src/lib/pcre/Error.hxx b/src/lib/pcre/Error.hxx new file mode 100644 index 000000000..32b32c95a --- /dev/null +++ b/src/lib/pcre/Error.hxx @@ -0,0 +1,56 @@ +/* + * Copyright 2007-2021 CM4all GmbH + * All rights reserved. + * + * author: Max Kellermann + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * FOUNDATION OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#include + +namespace Pcre { + +class ErrorCategory final : public std::error_category { +public: + const char *name() const noexcept override { + return "pcre2"; + } + + std::string message(int condition) const override; +}; + +extern ErrorCategory error_category; + +inline std::system_error +MakeError(int error, const char *msg) noexcept +{ + return std::system_error(error, error_category, msg); +} + +} // namespace Pcre diff --git a/src/lib/pcre/MatchData.hxx b/src/lib/pcre/MatchData.hxx new file mode 100644 index 000000000..eaab1f567 --- /dev/null +++ b/src/lib/pcre/MatchData.hxx @@ -0,0 +1,127 @@ +/* + * Copyright 2007-2021 CM4all GmbH + * All rights reserved. + * + * author: Max Kellermann + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * FOUNDATION OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#include + +#include +#include +#include +#include + +class MatchData { + friend class RegexPointer; + + pcre2_match_data_8 *match_data = nullptr; + const char *s; + PCRE2_SIZE *ovector; + std::size_t n; + + explicit MatchData(pcre2_match_data_8 *_md, const char *_s) noexcept + :match_data(_md), s(_s), + ovector(pcre2_get_ovector_pointer_8(match_data)) + { + } + +public: + MatchData() = default; + + MatchData(MatchData &&src) noexcept + :match_data(std::exchange(src.match_data, nullptr)), + s(src.s), ovector(src.ovector), n(src.n) {} + + ~MatchData() noexcept { + if (match_data != nullptr) + pcre2_match_data_free_8(match_data); + } + + MatchData &operator=(MatchData &&src) noexcept { + using std::swap; + swap(match_data, src.match_data); + swap(s, src.s); + swap(ovector, src.ovector); + swap(n, src.n); + return *this; + } + + constexpr operator bool() const noexcept { + return match_data != nullptr; + } + + constexpr std::size_t size() const noexcept { + assert(*this); + + return static_cast(n); + } + + [[gnu::pure]] + constexpr std::string_view operator[](std::size_t i) const noexcept { + assert(*this); + assert(i < size()); + + int start = ovector[2 * i]; + if (start < 0) + return {}; + + int end = ovector[2 * i + 1]; + assert(end >= start); + + return { s + start, std::size_t(end - start) }; + } + + static constexpr std::size_t npos = ~std::size_t{}; + + [[gnu::pure]] + constexpr std::size_t GetCaptureStart(std::size_t i) const noexcept { + assert(*this); + assert(i < size()); + + int start = ovector[2 * i]; + if (start < 0) + return npos; + + return std::size_t(start); + } + + [[gnu::pure]] + constexpr std::size_t GetCaptureEnd(std::size_t i) const noexcept { + assert(*this); + assert(i < size()); + + int end = ovector[2 * i + 1]; + if (end < 0) + return npos; + + return std::size_t(end); + } +}; diff --git a/src/lib/pcre/RegexPointer.hxx b/src/lib/pcre/RegexPointer.hxx index 00912b26e..0da4d6325 100644 --- a/src/lib/pcre/RegexPointer.hxx +++ b/src/lib/pcre/RegexPointer.hxx @@ -1,5 +1,5 @@ /* - * Copyright 2007-2018 Content Management AG + * Copyright 2007-2021 CM4all GmbH * All rights reserved. * * author: Max Kellermann @@ -30,27 +30,17 @@ * OF THE POSSIBILITY OF SUCH DAMAGE. */ -#ifndef REGEX_POINTER_HXX -#define REGEX_POINTER_HXX +#pragma once -#include "util/StringView.hxx" -#include "util/Compiler.h" +#include "MatchData.hxx" -#include +#include -#include - -#if GCC_CHECK_VERSION(11,0) -#pragma GCC diagnostic push -/* bogus GCC 11 warning "ovector may be used uninitialized" in the - ovector.size() call */ -#pragma GCC diagnostic ignored "-Wmaybe-uninitialized" -#endif +#include class RegexPointer { protected: - pcre *re = nullptr; - pcre_extra *extra = nullptr; + pcre2_code_8 *re = nullptr; unsigned n_capture = 0; @@ -60,18 +50,28 @@ public: } [[gnu::pure]] - bool Match(StringView s) const noexcept { - /* we don't need the data written to ovector, but PCRE can - omit internal allocations if we pass a buffer to - pcre_exec() */ - std::array ovector; - return pcre_exec(re, extra, s.data, s.size, - 0, 0, &ovector.front(), ovector.size()) >= 0; + MatchData Match(std::string_view s) const noexcept { + MatchData match_data{ + pcre2_match_data_create_from_pattern_8(re, nullptr), + s.data(), + }; + + int n = pcre2_match_8(re, (PCRE2_SPTR8)s.data(), s.size(), + 0, 0, + match_data.match_data, nullptr); + if (n < 0) + /* no match (or error) */ + return {}; + + match_data.n = n; + + if (n_capture >= match_data.n) + /* in its return value, PCRE omits mismatching + optional captures if (and only if) they are + the last capture; this kludge works around + this */ + match_data.n = n_capture + 1; + + return match_data; } }; - -#if GCC_CHECK_VERSION(11,0) -#pragma GCC diagnostic pop -#endif - -#endif diff --git a/src/lib/pcre/UniqueRegex.cxx b/src/lib/pcre/UniqueRegex.cxx index 4028380ab..61e6e2629 100644 --- a/src/lib/pcre/UniqueRegex.cxx +++ b/src/lib/pcre/UniqueRegex.cxx @@ -1,5 +1,5 @@ /* - * Copyright 2007-2018 Content Management AG + * Copyright 2007-2021 CM4all GmbH * All rights reserved. * * author: Max Kellermann @@ -31,41 +31,40 @@ */ #include "UniqueRegex.hxx" -#include "util/RuntimeError.hxx" +#include "Error.hxx" + +#include void UniqueRegex::Compile(const char *pattern, bool anchored, bool capture, bool caseless) { - constexpr int default_options = PCRE_DOTALL|PCRE_NO_AUTO_CAPTURE|PCRE_UTF8; + constexpr int default_options = PCRE2_DOTALL|PCRE2_NO_AUTO_CAPTURE; - int options = default_options; + uint32_t options = default_options; if (anchored) - options |= PCRE_ANCHORED; + options |= PCRE2_ANCHORED; if (capture) - options &= ~PCRE_NO_AUTO_CAPTURE; + options &= ~PCRE2_NO_AUTO_CAPTURE; if (caseless) - options |= PCRE_CASELESS; + options |= PCRE2_CASELESS; - const char *error_string; - int error_offset; - re = pcre_compile(pattern, options, &error_string, &error_offset, nullptr); - if (re == nullptr) - throw FormatRuntimeError("Error in regex at offset %d: %s", - error_offset, error_string); - - int study_options = 0; -#ifdef PCRE_CONFIG_JIT - study_options |= PCRE_STUDY_JIT_COMPILE; -#endif - extra = pcre_study(re, study_options, &error_string); - if (extra == nullptr && error_string != nullptr) { - pcre_free(re); - re = nullptr; - throw FormatRuntimeError("Regex study error: %s", error_string); + int error_number; + PCRE2_SIZE error_offset; + re = pcre2_compile_8(PCRE2_SPTR8(pattern), + PCRE2_ZERO_TERMINATED, options, + &error_number, &error_offset, + nullptr); + if (re == nullptr) { + char msg[256]; + snprintf(msg, sizeof(msg), "Error in regex at offset %zu", + error_offset); + throw Pcre::MakeError(error_number, msg); } - int n; - if (capture && pcre_fullinfo(re, extra, PCRE_INFO_CAPTURECOUNT, &n) == 0) + pcre2_jit_compile_8(re, PCRE2_JIT_COMPLETE); + + if (int n; capture && + pcre2_pattern_info_8(re, PCRE2_INFO_CAPTURECOUNT, &n) == 0) n_capture = n; } diff --git a/src/lib/pcre/UniqueRegex.hxx b/src/lib/pcre/UniqueRegex.hxx index 0d513bd86..1ee882086 100644 --- a/src/lib/pcre/UniqueRegex.hxx +++ b/src/lib/pcre/UniqueRegex.hxx @@ -1,5 +1,5 @@ /* - * Copyright 2007-2018 Content Management AG + * Copyright 2007-2021 CM4all GmbH * All rights reserved. * * author: Max Kellermann @@ -30,15 +30,12 @@ * OF THE POSSIBILITY OF SUCH DAMAGE. */ -#ifndef UNIQUE_REGEX_HXX -#define UNIQUE_REGEX_HXX +#pragma once #include "RegexPointer.hxx" #include -#include - class UniqueRegex : public RegexPointer { public: UniqueRegex() = default; @@ -50,29 +47,22 @@ public: UniqueRegex(UniqueRegex &&src) noexcept:RegexPointer(src) { src.re = nullptr; - src.extra = nullptr; } ~UniqueRegex() noexcept { - pcre_free(re); -#ifdef PCRE_CONFIG_JIT - pcre_free_study(extra); -#else - pcre_free(extra); -#endif + if (re != nullptr) + pcre2_code_free_8(re); } - UniqueRegex &operator=(UniqueRegex &&src) { + UniqueRegex &operator=(UniqueRegex &&src) noexcept { using std::swap; swap(*this, src); return *this; } /** - * Throws std::runtime_error on error. + * Throws Pcre::Error on error. */ void Compile(const char *pattern, bool anchored, bool capture, bool caseless); }; - -#endif diff --git a/src/lib/pcre/meson.build b/src/lib/pcre/meson.build index 07e83a600..074c4dcad 100644 --- a/src/lib/pcre/meson.build +++ b/src/lib/pcre/meson.build @@ -1,11 +1,17 @@ -pcre_dep = dependency('libpcre', required: get_option('pcre')) +pcre_dep = dependency('libpcre2-8', required: get_option('pcre')) conf.set('HAVE_PCRE', pcre_dep.found()) if not pcre_dep.found() subdir_done() endif +pcre_dep = declare_dependency( + compile_args: '-DPCRE2_CODE_UNIT_WIDTH=0', + dependencies: pcre_dep, +) + pcre = static_library( 'pcre', + 'Error.cxx', 'UniqueRegex.cxx', include_directories: inc, dependencies: [