2023-03-06 14:42:04 +01:00
|
|
|
// SPDX-License-Identifier: BSD-2-Clause
|
|
|
|
// author: Max Kellermann <max.kellermann@gmail.com>
|
2014-10-10 20:42:32 +02:00
|
|
|
|
|
|
|
#ifndef UTF8_HXX
|
|
|
|
#define UTF8_HXX
|
|
|
|
|
2020-03-13 00:46:28 +01:00
|
|
|
#include <cstddef>
|
2014-10-10 20:42:32 +02:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Is this a valid UTF-8 string?
|
|
|
|
*/
|
2021-11-26 16:23:16 +01:00
|
|
|
[[gnu::pure]] [[gnu::nonnull]]
|
2014-10-10 20:42:32 +02:00
|
|
|
bool
|
2017-05-08 14:44:49 +02:00
|
|
|
ValidateUTF8(const char *p) noexcept;
|
2014-10-10 20:42:32 +02:00
|
|
|
|
2014-10-10 21:17:40 +02:00
|
|
|
/**
|
|
|
|
* @return the number of the sequence beginning with the given
|
|
|
|
* character, or 0 if the character is not a valid start byte
|
|
|
|
*/
|
2021-11-26 16:23:16 +01:00
|
|
|
[[gnu::const]]
|
2021-02-09 06:21:58 +01:00
|
|
|
std::size_t
|
2017-05-08 14:44:49 +02:00
|
|
|
SequenceLengthUTF8(char ch) noexcept;
|
2014-10-10 21:17:40 +02:00
|
|
|
|
|
|
|
/**
|
|
|
|
* @return the number of the first sequence in the given string, or 0
|
|
|
|
* if the sequence is malformed
|
|
|
|
*/
|
2021-11-26 16:23:16 +01:00
|
|
|
[[gnu::pure]]
|
2021-02-09 06:21:58 +01:00
|
|
|
std::size_t
|
2017-05-08 14:44:49 +02:00
|
|
|
SequenceLengthUTF8(const char *p) noexcept;
|
2014-10-10 21:17:40 +02:00
|
|
|
|
2014-10-10 20:42:32 +02:00
|
|
|
/**
|
|
|
|
* Convert the specified string from ISO-8859-1 to UTF-8.
|
|
|
|
*
|
|
|
|
* @return the UTF-8 version of the source string; may return #src if
|
|
|
|
* there are no non-ASCII characters; returns nullptr if the destination
|
|
|
|
* buffer is too small
|
|
|
|
*/
|
2021-11-26 16:23:16 +01:00
|
|
|
[[gnu::pure]] [[gnu::nonnull]]
|
2014-10-10 20:42:32 +02:00
|
|
|
const char *
|
2021-02-09 06:21:58 +01:00
|
|
|
Latin1ToUTF8(const char *src, char *buffer, std::size_t buffer_size) noexcept;
|
2014-10-10 20:42:32 +02:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Convert the specified Unicode character to UTF-8 and write it to
|
|
|
|
* the buffer. buffer must have a length of at least 6!
|
|
|
|
*
|
|
|
|
* @return a pointer to the buffer plus the added bytes(s)
|
|
|
|
*/
|
2021-11-26 16:23:16 +01:00
|
|
|
[[gnu::nonnull]]
|
2014-10-10 20:42:32 +02:00
|
|
|
char *
|
2017-05-08 14:44:49 +02:00
|
|
|
UnicodeToUTF8(unsigned ch, char *buffer) noexcept;
|
2014-10-10 20:42:32 +02:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Returns the number of characters in the string. This is different
|
|
|
|
* from strlen(), which counts the number of bytes.
|
|
|
|
*/
|
2021-11-26 16:23:16 +01:00
|
|
|
[[gnu::pure]] [[gnu::nonnull]]
|
2021-02-09 06:21:58 +01:00
|
|
|
std::size_t
|
2017-05-08 14:44:49 +02:00
|
|
|
LengthUTF8(const char *p) noexcept;
|
2014-10-10 20:42:32 +02:00
|
|
|
|
|
|
|
#endif
|