decoder/sidplay: Fix windows-1252 to utf-8 string conversion

High Voltage SID Collection (HVSC) metadata fields are encoded in
windows-1252, as described in DOCUMENTS/SID_file_format.txt:

https://www.hvsc.c64.org/download/C64Music/DOCUMENTS/SID_file_format.txt

If utf-8 transcoding fails, or the ICU library is unavailable, fall
back to plain ASCII and replace other characters with '?'.
This commit is contained in:
Fredrik Noring 2019-08-04 08:07:42 +02:00 committed by Max Kellermann
parent 0ed10542cc
commit 7723c481db
2 changed files with 46 additions and 17 deletions

1
NEWS
View File

@ -1,6 +1,7 @@
ver 0.21.14 (not yet released) ver 0.21.14 (not yet released)
* decoder * decoder
- sidplay: show track durations in database - sidplay: show track durations in database
- sidplay: convert tag values from Windows-1252 charset
ver 0.21.13 (2019/08/06) ver 0.21.13 (2019/08/06)
* input * input

View File

@ -25,6 +25,7 @@
#include "song/DetachedSong.hxx" #include "song/DetachedSong.hxx"
#include "fs/Path.hxx" #include "fs/Path.hxx"
#include "fs/AllocatedPath.hxx" #include "fs/AllocatedPath.hxx"
#include "lib/icu/Converter.hxx"
#ifdef HAVE_SIDPLAYFP #ifdef HAVE_SIDPLAYFP
#include "fs/io/FileReader.hxx" #include "fs/io/FileReader.hxx"
#include "util/RuntimeError.hxx" #include "util/RuntimeError.hxx"
@ -32,6 +33,8 @@
#include "util/Macros.hxx" #include "util/Macros.hxx"
#include "util/StringFormat.hxx" #include "util/StringFormat.hxx"
#include "util/Domain.hxx" #include "util/Domain.hxx"
#include "util/AllocatedString.hxx"
#include "util/CharUtil.hxx"
#include "system/ByteOrder.hxx" #include "system/ByteOrder.hxx"
#include "Log.hxx" #include "Log.hxx"
@ -432,19 +435,46 @@ sidplay_file_decode(DecoderClient &client, Path path_fs)
} while (cmd != DecoderCommand::STOP); } while (cmd != DecoderCommand::STOP);
} }
static AllocatedString<char>
Windows1252ToUTF8(const char *s) noexcept
{
#ifdef HAVE_ICU_CONVERTER
try {
std::unique_ptr<IcuConverter>
converter(IcuConverter::Create("windows-1252"));
return converter->ToUTF8(s);
} catch (...) { }
#endif
/*
* Fallback to not transcoding windows-1252 to utf-8, that may result
* in invalid utf-8 unless nonprintable characters are replaced.
*/
auto t = AllocatedString<char>::Duplicate(s);
for (size_t i = 0; t[i] != AllocatedString<char>::SENTINEL; i++)
if (!IsPrintableASCII(t[i]))
t[i] = '?';
return t;
}
gcc_pure gcc_pure
static const char * static AllocatedString<char>
GetInfoString(const SidTuneInfo &info, unsigned i) noexcept GetInfoString(const SidTuneInfo &info, unsigned i) noexcept
{ {
#ifdef HAVE_SIDPLAYFP #ifdef HAVE_SIDPLAYFP
return info.numberOfInfoStrings() > i const char *s = info.numberOfInfoStrings() > i
? info.infoString(i) ? info.infoString(i)
: nullptr; : "";
#else #else
return info.numberOfInfoStrings > i const char *s = info.numberOfInfoStrings > i
? info.infoString[i] ? info.infoString[i]
: nullptr; : "";
#endif #endif
return Windows1252ToUTF8(s);
} }
static void static void
@ -452,27 +482,25 @@ ScanSidTuneInfo(const SidTuneInfo &info, unsigned track, unsigned n_tracks,
TagHandler &handler) noexcept TagHandler &handler) noexcept
{ {
/* title */ /* title */
const char *title = GetInfoString(info, 0); const auto title = GetInfoString(info, 0);
if (title == nullptr)
title = "";
if (n_tracks > 1) { if (n_tracks > 1) {
const auto tag_title = const auto tag_title =
StringFormat<1024>("%s (%u/%u)", StringFormat<1024>("%s (%u/%u)",
title, track, n_tracks); title.c_str(), track, n_tracks);
handler.OnTag(TAG_TITLE, tag_title); handler.OnTag(TAG_TITLE, tag_title.c_str());
} else } else
handler.OnTag(TAG_TITLE, title); handler.OnTag(TAG_TITLE, title.c_str());
/* artist */ /* artist */
const char *artist = GetInfoString(info, 1); const auto artist = GetInfoString(info, 1);
if (artist != nullptr) if (!artist.empty())
handler.OnTag(TAG_ARTIST, artist); handler.OnTag(TAG_ARTIST, artist.c_str());
/* date */ /* date */
const char *date = GetInfoString(info, 2); const auto date = GetInfoString(info, 2);
if (date != nullptr) if (!date.empty())
handler.OnTag(TAG_DATE, date); handler.OnTag(TAG_DATE, date.c_str());
/* track */ /* track */
handler.OnTag(TAG_TRACK, StringFormat<16>("%u", track)); handler.OnTag(TAG_TRACK, StringFormat<16>("%u", track));