2023-03-06 14:42:04 +01:00
|
|
|
// SPDX-License-Identifier: BSD-2-Clause
|
|
|
|
// author: Max Kellermann <max.kellermann@gmail.com>
|
2019-08-09 15:54:13 +02:00
|
|
|
|
|
|
|
#include "UriExtract.hxx"
|
|
|
|
#include "CharUtil.hxx"
|
2022-07-04 14:59:37 +02:00
|
|
|
#include "StringSplit.hxx"
|
2019-08-09 15:54:13 +02:00
|
|
|
|
2022-07-04 14:59:37 +02:00
|
|
|
#include <cstring>
|
2019-08-09 15:54:13 +02:00
|
|
|
|
|
|
|
static constexpr bool
|
|
|
|
IsValidSchemeStart(char ch)
|
|
|
|
{
|
|
|
|
return IsLowerAlphaASCII(ch);
|
|
|
|
}
|
|
|
|
|
|
|
|
static constexpr bool
|
|
|
|
IsValidSchemeChar(char ch)
|
|
|
|
{
|
|
|
|
return IsLowerAlphaASCII(ch) || IsDigitASCII(ch) ||
|
|
|
|
ch == '+' || ch == '.' || ch == '-';
|
|
|
|
}
|
|
|
|
|
2021-10-13 11:28:04 +02:00
|
|
|
[[gnu::pure]]
|
2019-08-09 15:54:13 +02:00
|
|
|
static bool
|
2020-03-13 18:15:21 +01:00
|
|
|
IsValidScheme(std::string_view p) noexcept
|
2019-08-09 15:54:13 +02:00
|
|
|
{
|
|
|
|
if (p.empty() || !IsValidSchemeStart(p.front()))
|
|
|
|
return false;
|
|
|
|
|
2020-03-13 18:15:21 +01:00
|
|
|
for (size_t i = 1; i < p.size(); ++i)
|
2019-08-09 15:54:13 +02:00
|
|
|
if (!IsValidSchemeChar(p[i]))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Return the URI part after the scheme specification (and after the
|
|
|
|
* double slash).
|
|
|
|
*/
|
2021-10-13 11:28:04 +02:00
|
|
|
[[gnu::pure]]
|
2020-03-13 18:15:21 +01:00
|
|
|
static std::string_view
|
|
|
|
uri_after_scheme(std::string_view uri) noexcept
|
2019-08-09 15:54:13 +02:00
|
|
|
{
|
2020-03-13 18:15:21 +01:00
|
|
|
if (uri.length() > 2 &&
|
|
|
|
uri[0] == '/' && uri[1] == '/' && uri[2] != '/')
|
|
|
|
return uri.substr(2);
|
|
|
|
|
|
|
|
auto colon = uri.find(':');
|
|
|
|
if (colon == std::string_view::npos ||
|
|
|
|
!IsValidScheme(uri.substr(0, colon)))
|
|
|
|
return {};
|
|
|
|
|
|
|
|
uri = uri.substr(colon + 1);
|
|
|
|
if (uri[0] != '/' || uri[1] != '/')
|
|
|
|
return {};
|
|
|
|
|
|
|
|
return uri.substr(2);
|
2019-08-09 15:54:13 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
bool
|
2021-01-05 12:04:08 +01:00
|
|
|
uri_has_scheme(std::string_view uri) noexcept
|
2019-08-09 15:54:13 +02:00
|
|
|
{
|
2020-03-13 18:11:29 +01:00
|
|
|
return !uri_get_scheme(uri).empty();
|
2019-08-09 15:54:13 +02:00
|
|
|
}
|
|
|
|
|
2020-03-13 18:11:29 +01:00
|
|
|
std::string_view
|
|
|
|
uri_get_scheme(std::string_view uri) noexcept
|
2019-08-09 15:54:13 +02:00
|
|
|
{
|
2020-03-13 18:11:29 +01:00
|
|
|
auto end = uri.find("://");
|
|
|
|
if (end == std::string_view::npos)
|
|
|
|
return {};
|
2019-08-09 15:54:13 +02:00
|
|
|
|
2020-03-13 18:11:29 +01:00
|
|
|
return uri.substr(0, end);
|
2019-08-09 15:54:13 +02:00
|
|
|
}
|
|
|
|
|
2019-09-07 23:57:33 +02:00
|
|
|
bool
|
|
|
|
uri_is_relative_path(const char *uri) noexcept
|
|
|
|
{
|
|
|
|
return !uri_has_scheme(uri) && *uri != '/';
|
|
|
|
}
|
|
|
|
|
2020-03-13 18:15:21 +01:00
|
|
|
std::string_view
|
2020-03-13 18:56:49 +01:00
|
|
|
uri_get_path_query_fragment(std::string_view uri) noexcept
|
2019-08-09 15:54:13 +02:00
|
|
|
{
|
2020-03-13 18:15:21 +01:00
|
|
|
auto ap = uri_after_scheme(uri);
|
|
|
|
if (ap.data() != nullptr) {
|
|
|
|
auto slash = ap.find('/');
|
|
|
|
if (slash == std::string_view::npos)
|
|
|
|
return {};
|
|
|
|
return ap.substr(slash);
|
|
|
|
}
|
2019-08-09 15:54:13 +02:00
|
|
|
|
|
|
|
return uri;
|
|
|
|
}
|
|
|
|
|
2021-10-13 11:28:04 +02:00
|
|
|
[[gnu::pure]]
|
2022-07-04 14:59:37 +02:00
|
|
|
static std::string_view
|
|
|
|
UriWithoutQueryString(std::string_view uri) noexcept
|
2020-11-04 21:14:34 +01:00
|
|
|
{
|
2022-07-04 14:59:37 +02:00
|
|
|
return Split(uri, '?').first;
|
2020-11-04 21:14:34 +01:00
|
|
|
}
|
|
|
|
|
2020-03-13 18:56:49 +01:00
|
|
|
std::string_view
|
|
|
|
uri_get_path(std::string_view uri) noexcept
|
|
|
|
{
|
|
|
|
auto path = uri_get_path_query_fragment(uri);
|
|
|
|
if (path.data() == nullptr || path.data() == uri.data())
|
|
|
|
/* preserve query and fragment if this URI doesn't
|
|
|
|
have a scheme; the question mark may be part of the
|
|
|
|
file name, after all */
|
|
|
|
return path;
|
|
|
|
|
|
|
|
auto end = path.find('?');
|
|
|
|
if (end == std::string_view::npos)
|
|
|
|
end = path.find('#');
|
|
|
|
|
|
|
|
return path.substr(0, end);
|
|
|
|
}
|
|
|
|
|
2019-08-09 15:54:13 +02:00
|
|
|
/* suffixes should be ascii only characters */
|
2020-11-04 20:39:06 +01:00
|
|
|
std::string_view
|
2020-11-04 21:10:58 +01:00
|
|
|
uri_get_suffix(std::string_view _uri) noexcept
|
2019-08-09 15:54:13 +02:00
|
|
|
{
|
2020-11-04 21:14:34 +01:00
|
|
|
const auto uri = UriWithoutQueryString(_uri);
|
|
|
|
|
2022-07-04 14:59:37 +02:00
|
|
|
const auto dot = uri.rfind('.');
|
|
|
|
if (dot == uri.npos || dot == 0 ||
|
|
|
|
uri[dot - 1] == '/' || uri[dot - 1] == '\\')
|
2020-11-04 20:39:06 +01:00
|
|
|
return {};
|
2019-08-09 15:54:13 +02:00
|
|
|
|
2020-11-04 21:10:58 +01:00
|
|
|
auto suffix = uri.substr(dot + 1);
|
2022-07-04 14:59:37 +02:00
|
|
|
if (suffix.find('/') != suffix.npos ||
|
|
|
|
suffix.find('\\') != suffix.npos)
|
2020-11-04 21:10:58 +01:00
|
|
|
/* this was not the last path segment */
|
2020-11-04 20:39:06 +01:00
|
|
|
return {};
|
2019-08-09 15:54:13 +02:00
|
|
|
|
2020-11-04 21:14:34 +01:00
|
|
|
return suffix;
|
2019-08-09 15:54:13 +02:00
|
|
|
}
|
2019-08-09 16:14:17 +02:00
|
|
|
|
|
|
|
const char *
|
|
|
|
uri_get_fragment(const char *uri) noexcept
|
|
|
|
{
|
2020-05-01 04:25:55 +02:00
|
|
|
const char *fragment = std::strchr(uri, '#');
|
2019-08-09 16:14:17 +02:00
|
|
|
if (fragment == nullptr)
|
|
|
|
return nullptr;
|
|
|
|
|
|
|
|
return fragment + 1;
|
|
|
|
}
|