From: epg@pretzelnet.org <> Date: Mon, 13 Jan 2014 02:58:01 +0000 (-0800) Subject: Rewrote and stop taglib from re-encoding as UTF-16. X-Git-Url: https://diplodocus.org/git/flac-archive/commitdiff_plain/cf500bdfc5fd120a9bfe8539c51a65694af08c74?ds=inline;hp=6b068d17c33b79554fdb3006d438c8a203812527 Rewrote and stop taglib from re-encoding as UTF-16. The rewrite wasn't actually necessary, but I ended up doing so while trying to figure out what was happening to the text frames. Finally figured out that taglib rudely writes out UTF-16 even though it read in latin1 and I didn't tell it to change anything. You have to tell it for each frame which encoding to use. --- diff --git a/strip-tags.cc b/strip-tags.cc index 610af68..2f18cd0 100644 --- a/strip-tags.cc +++ b/strip-tags.cc @@ -2,80 +2,103 @@ extern "C" { #include } -#include -#include -#include -#include +#include +#include +#include #include #include +#include +#include -// set::find didn't work...?! -class ByteVectorSet { -public: - ByteVectorSet(std::set bv_set) - : set_(bv_set) - { } +using std::cerr; +using std::endl; +using std::shared_ptr; +using std::vector; - bool has(TagLib::ByteVector *bv) { - for (std::set::const_iterator it = set_.begin(); - it != set_.end(); - ++it) { - if (**it == *bv) { - return true; - } - } - return false; - } +typedef vector> ByteVectorSet; -private: - std::set set_; -}; +static bool diag = true; +static bool force_write = false; +static bool strip_v1 = true; static bool -strip_frames(TagLib::MPEG::File &f, ByteVectorSet &preserve) +ByteVectorSet_has(const ByteVectorSet &set, + const TagLib::ByteVector &target) { - TagLib::ID3v2::Tag *tag = f.ID3v2Tag(); - if (tag == 0) { - return false; + for (const auto vector : set) { + if (*vector == target) { + return true; + } } + return false; +} - bool stripped = false; - TagLib::ID3v2::FrameList frames = tag->frameList(); - TagLib::ID3v2::FrameList::ConstIterator it; - for (it = frames.begin(); - it != frames.end(); - ++it) { - TagLib::ByteVector frameid = (*it)->frameID(); - uint frameid_size = frameid.size(); - char *frameid_s = new char[frameid_size + 1]; - memcpy(frameid_s, frameid.data(), frameid_size); - frameid_s[frameid_size] = '\0'; - printf("%s: %s ", frameid_s, (*it)->toString().toCString()); - if (preserve.has(&frameid)) { - puts("preserve"); +static bool +delete_id3v2_frames(TagLib::MPEG::File &file, + const ByteVectorSet &delete_frame_ids) +{ + TagLib::ID3v2::Tag *tag = file.ID3v2Tag(); + // Build a list of frames to delete in delete_frames. + vector delete_frames; + for (auto *frame : tag->frameList()) { + TagLib::ByteVector frameid = frame->frameID(); + if (diag) { + cerr << frameid; + } + if (ByteVectorSet_has(delete_frame_ids, frameid)) { + cerr << " deleted" << endl; + delete_frames.push_back(frame); continue; } - puts("remove"); - delete[] frameid_s; - tag->removeFrame(*it, true); - stripped = true; + cerr << " kept" << endl; + // Taglib takes my latin1 text frames and writes them back as UTF-16. + // WTF mate! Force them back. + TagLib::ID3v2::TextIdentificationFrame *f = + dynamic_cast(frame); + if (f != nullptr) { + f->setTextEncoding(TagLib::String::Latin1); + } + } + for (auto *frame : delete_frames) { + tag->removeFrame(frame); + } + return !delete_frames.empty(); +} +static void +strip_tags(TagLib::MPEG::File &file, + const ByteVectorSet &delete_frames, + char *fn) +{ + bool changed = false; + if (!delete_frames.empty() && file.hasID3v2Tag()) { + changed = delete_id3v2_frames(file, delete_frames); + } + if (changed || (strip_v1 && file.hasID3v1Tag()) || force_write) { + if (!file.save(TagLib::MPEG::File::ID3v2, strip_v1)) { + // Their error handling really sucks; File::save just calls fwrite), + // doesn't check its return value, and doesn't close or sync. + // File::save only returns false on higher-level classes of errors; + // the kinds of actual errors we'd see (permissions, disk full, NFS + // failure, and so on) aren't reported at all. + cerr << "failed to save " << fn << "; blame taglib" << endl; + } } - return stripped; } int main(int argc, char **argv) { - std::set preserve_frames; + ByteVectorSet delete_frames; int c; - while ((c = getopt(argc, argv, "p:")) != -1) { + while ((c = getopt(argc, argv, "d:")) != -1) { switch (c) { - case 'p': - preserve_frames.insert(new TagLib::ByteVector(optarg)); - break; + case 'd': { + shared_ptr bv(new TagLib::ByteVector(optarg)); + delete_frames.push_back(bv); + } break; default: // getopt printed an error. return 2; @@ -83,24 +106,18 @@ main(int argc, char **argv) } argc -= optind; argv += optind; - ByteVectorSet preserve_frame_set(preserve_frames); - + int bad_files = 0; for (int i = 0; i < argc; ++i) { - const char *mp3_path = argv[i]; - TagLib::MPEG::File f(mp3_path); - if (!strip_frames(f, preserve_frame_set)) { - continue; - } - errno = 0; - if (!f.save()) { - if (errno != 0) { - fprintf(stderr, "failed to save %s: %s\n", - mp3_path, strerror(errno)); - } else { - fprintf(stderr, "failed to save %s\n", mp3_path); + char *fn = argv[i]; + TagLib::MPEG::File f(fn); + if (!f.isValid()) { + if (diag) { + cerr << "failed to open " << fn << endl; } - return 3; + bad_files++; + continue; } + strip_tags(f, delete_frames, fn); } return 0;