decoder/sidplay: Fix windows-1252 to utf-8 string conversion

High Voltage SID Collection (HVSC) metadata fields are encoded in
windows-1252, as described in DOCUMENTS/SID_file_format.txt:

https://www.hvsc.c64.org/download/C64Music/DOCUMENTS/SID_file_format.txt

If utf-8 transcoding fails, or the ICU library is unavailable, fall
back to plain ASCII and replace other characters with '?'.
This commit is contained in:
Fredrik Noring 2019-08-04 08:07:42 +02:00 committed by Max Kellermann
parent 0ed10542cc
commit 7723c481db
2 changed files with 46 additions and 17 deletions

1
NEWS
View File

@ -1,6 +1,7 @@
ver 0.21.14 (not yet released)
* decoder
- sidplay: show track durations in database
- sidplay: convert tag values from Windows-1252 charset
ver 0.21.13 (2019/08/06)
* input

View File

@ -25,6 +25,7 @@
#include "song/DetachedSong.hxx"
#include "fs/Path.hxx"
#include "fs/AllocatedPath.hxx"
#include "lib/icu/Converter.hxx"
#ifdef HAVE_SIDPLAYFP
#include "fs/io/FileReader.hxx"
#include "util/RuntimeError.hxx"
@ -32,6 +33,8 @@
#include "util/Macros.hxx"
#include "util/StringFormat.hxx"
#include "util/Domain.hxx"
#include "util/AllocatedString.hxx"
#include "util/CharUtil.hxx"
#include "system/ByteOrder.hxx"
#include "Log.hxx"
@ -432,19 +435,46 @@ sidplay_file_decode(DecoderClient &client, Path path_fs)
} while (cmd != DecoderCommand::STOP);
}
static AllocatedString<char>
Windows1252ToUTF8(const char *s) noexcept
{
#ifdef HAVE_ICU_CONVERTER
try {
std::unique_ptr<IcuConverter>
converter(IcuConverter::Create("windows-1252"));
return converter->ToUTF8(s);
} catch (...) { }
#endif
/*
* Fallback to not transcoding windows-1252 to utf-8, that may result
* in invalid utf-8 unless nonprintable characters are replaced.
*/
auto t = AllocatedString<char>::Duplicate(s);
for (size_t i = 0; t[i] != AllocatedString<char>::SENTINEL; i++)
if (!IsPrintableASCII(t[i]))
t[i] = '?';
return t;
}
gcc_pure
static const char *
static AllocatedString<char>
GetInfoString(const SidTuneInfo &info, unsigned i) noexcept
{
#ifdef HAVE_SIDPLAYFP
return info.numberOfInfoStrings() > i
const char *s = info.numberOfInfoStrings() > i
? info.infoString(i)
: nullptr;
: "";
#else
return info.numberOfInfoStrings > i
const char *s = info.numberOfInfoStrings > i
? info.infoString[i]
: nullptr;
: "";
#endif
return Windows1252ToUTF8(s);
}
static void
@ -452,27 +482,25 @@ ScanSidTuneInfo(const SidTuneInfo &info, unsigned track, unsigned n_tracks,
TagHandler &handler) noexcept
{
/* title */
const char *title = GetInfoString(info, 0);
if (title == nullptr)
title = "";
const auto title = GetInfoString(info, 0);
if (n_tracks > 1) {
const auto tag_title =
StringFormat<1024>("%s (%u/%u)",
title, track, n_tracks);
handler.OnTag(TAG_TITLE, tag_title);
title.c_str(), track, n_tracks);
handler.OnTag(TAG_TITLE, tag_title.c_str());
} else
handler.OnTag(TAG_TITLE, title);
handler.OnTag(TAG_TITLE, title.c_str());
/* artist */
const char *artist = GetInfoString(info, 1);
if (artist != nullptr)
handler.OnTag(TAG_ARTIST, artist);
const auto artist = GetInfoString(info, 1);
if (!artist.empty())
handler.OnTag(TAG_ARTIST, artist.c_str());
/* date */
const char *date = GetInfoString(info, 2);
if (date != nullptr)
handler.OnTag(TAG_DATE, date);
const auto date = GetInfoString(info, 2);
if (!date.empty())
handler.OnTag(TAG_DATE, date.c_str());
/* track */
handler.OnTag(TAG_TRACK, StringFormat<16>("%u", track));