summaryrefslogtreecommitdiff
path: root/gst/id3tag
diff options
context:
space:
mode:
authorMichael Smith <msmith@songbirdnest.com>2009-09-10 12:12:26 -0700
committerMichael Smith <msmith@songbirdnest.com>2009-09-10 12:14:28 -0700
commitaf6ea56cce207a432728ac6b9d6a31d60d1595cd (patch)
treebb71b645f57d76425b29ac4dcf45db60e967df00 /gst/id3tag
parent29c3542dd5ed0471245f02c13e9e2a63399f1ad8 (diff)
downloadgstreamer-plugins-bad-af6ea56cce207a432728ac6b9d6a31d60d1595cd.tar.gz
id3tag: When writing id3v2.3, do not use UTF-8.
UTF-8 is only permitted in v2.4. So instead use ISO-8859-1 for ascii-only strings, and UTF16 otherwise. Also, do not null terminate strings in text frames, except where required. These two allow windows media player to play (and correctly read tags) files created by id3mux.
Diffstat (limited to 'gst/id3tag')
-rw-r--r--gst/id3tag/id3tag.c128
1 files changed, 100 insertions, 28 deletions
diff --git a/gst/id3tag/id3tag.c b/gst/id3tag/id3tag.c
index 43c7635a3..7af74fc65 100644
--- a/gst/id3tag/id3tag.c
+++ b/gst/id3tag/id3tag.c
@@ -142,7 +142,9 @@ typedef struct
typedef void (*GstId3v2AddTagFunc) (GstId3v2Tag * tag, const GstTagList * list,
const gchar * gst_tag, guint num_tags, const gchar * data);
-#define ID3V2_ENCODING_UTF8 0x03
+#define ID3V2_ENCODING_ISO_8859_1 0x00
+#define ID3V2_ENCODING_UTF16_BOM 0x01
+#define ID3V2_ENCODING_UTF8 0x03
static gboolean id3v2_tag_init (GstId3v2Tag * tag, guint major_version);
static void id3v2_tag_unset (GstId3v2Tag * tag);
@@ -320,12 +322,76 @@ id3v2_frame_unset (GstId3v2Frame * frame)
memset (frame, 0, sizeof (GstId3v2Frame));
}
+static gboolean
+id3v2_string_is_ascii (const gchar * string)
+{
+ while (*string) {
+ if (!g_ascii_isprint (*string++))
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+static int
+id3v2_tag_string_encoding (GstId3v2Tag * tag, const gchar * string)
+{
+ int encoding;
+ if (tag->major_version == 4) {
+ /* ID3v2.4 supports UTF8, use it unconditionally as it's really the only
+ sensible encoding. */
+ encoding = ID3V2_ENCODING_UTF8;
+ } else {
+ /* If we're not writing v2.4, then check to see if it's ASCII.
+ If it is, write ISO-8859-1 (compatible with ASCII).
+ Otherwise, write UTF-16-LE with a byte order marker.
+ Note that we don't write arbitrary ISO-8859-1 as ISO-8859-1, because much
+ software misuses this - and non-ASCII might confuse it. */
+ if (id3v2_string_is_ascii (string))
+ encoding = ID3V2_ENCODING_ISO_8859_1;
+ else
+ encoding = ID3V2_ENCODING_UTF16_BOM;
+ }
+
+ return encoding;
+}
+
+static void
+id3v2_frame_write_string (GstId3v2Frame * frame, int encoding,
+ const gchar * string, gboolean null_terminate)
+{
+ int terminator_length;
+ if (encoding == ID3V2_ENCODING_UTF16_BOM) {
+ gsize utf16len;
+ /* This converts to little-endian UTF-16, WITH a byte-order-marker */
+ gchar *utf16 = g_convert (string, -1, "UTF-16LE", "UTF-8",
+ NULL, &utf16len, NULL);
+ if (!utf16) {
+ GST_WARNING ("Failed to convert UTF-8 to UTF-16LE");
+ return;
+ }
+
+ /* NUL terminator is 2 bytes, if present */
+ terminator_length = null_terminate ? 2 : 0;
+ id3v2_frame_write_bytes (frame, (const guint8 *) utf16,
+ utf16len + terminator_length);
+
+ g_free (utf16);
+ } else {
+ /* write NUL terminator as well if requested */
+ terminator_length = null_terminate ? 1 : 0;
+ id3v2_frame_write_bytes (frame, (const guint8 *) string,
+ strlen (string) + terminator_length);
+ }
+}
+
static void
id3v2_tag_add_text_frame (GstId3v2Tag * tag, const gchar * frame_id,
gchar ** strings_utf8, int num_strings)
{
GstId3v2Frame frame;
guint len, i;
+ int encoding;
if (num_strings < 1 || strings_utf8 == NULL || strings_utf8[0] == NULL) {
GST_LOG ("Not adding text frame, no strings");
@@ -333,7 +399,9 @@ id3v2_tag_add_text_frame (GstId3v2Tag * tag, const gchar * frame_id,
}
id3v2_frame_init (&frame, frame_id, 0);
- id3v2_frame_write_uint8 (&frame, ID3V2_ENCODING_UTF8);
+
+ encoding = id3v2_tag_string_encoding (tag, strings_utf8[0]);
+ id3v2_frame_write_uint8 (&frame, encoding);
GST_LOG ("Adding text frame %s with %d strings", frame_id, num_strings);
@@ -341,8 +409,8 @@ id3v2_tag_add_text_frame (GstId3v2Tag * tag, const gchar * frame_id,
len = strlen (strings_utf8[i]);
g_return_if_fail (g_utf8_validate (strings_utf8[i], len, NULL));
- /* write NUL terminator as well */
- id3v2_frame_write_bytes (&frame, (const guint8 *) strings_utf8[i], len + 1);
+ id3v2_frame_write_string (&frame, encoding, strings_utf8[i],
+ i != num_strings - 1);
/* only v2.4.0 supports multiple strings per frame (according to the
* earlier specs tag readers should just ignore everything after the first
@@ -530,11 +598,10 @@ add_comment_tag (GstId3v2Tag * id3v2tag, const GstTagList * list,
if (gst_tag_list_get_string_index (list, tag, n, &s) && s != NULL) {
gchar *desc = NULL, *val = NULL, *lang = NULL;
- int desclen, vallen;
+ int desclen, vallen, encoding1, encoding2, encoding;
GstId3v2Frame frame;
id3v2_frame_init (&frame, "COMM", 0);
- id3v2_frame_write_uint8 (&frame, ID3V2_ENCODING_UTF8);
if (strcmp (tag, GST_TAG_COMMENT) == 0 ||
!gst_tag_parse_extended_comment (s, &desc, &lang, &val, TRUE)) {
@@ -556,10 +623,15 @@ add_comment_tag (GstId3v2Tag * id3v2tag, const GstTagList * list,
GST_LOG ("%s[%u] = '%s' (%s|%s|%s)", tag, n, s, GST_STR_NULL (desc),
GST_STR_NULL (lang), GST_STR_NULL (val));
+ encoding1 = id3v2_tag_string_encoding (id3v2tag, desc);
+ encoding2 = id3v2_tag_string_encoding (id3v2tag, val);
+ encoding = MAX (encoding1, encoding2);
+
+ id3v2_frame_write_uint8 (&frame, encoding);
id3v2_frame_write_bytes (&frame, (const guint8 *) lang, 3);
- /* write description and value, each including NULL terminator */
- id3v2_frame_write_bytes (&frame, (const guint8 *) desc, desclen + 1);
- id3v2_frame_write_bytes (&frame, (const guint8 *) val, vallen + 1);
+ /* write description and value */
+ id3v2_frame_write_string (&frame, encoding, desc, TRUE);
+ id3v2_frame_write_string (&frame, encoding, val, FALSE);
g_free (lang);
g_free (desc);
@@ -597,6 +669,7 @@ add_image_tag (GstId3v2Tag * id3v2tag, const GstTagList * list,
if (mime_type != NULL) {
const gchar *desc;
GstId3v2Frame frame;
+ int encoding;
/* APIC frame specifies "-->" if we're providing a URL to the image
rather than directly embedding it */
@@ -607,9 +680,14 @@ add_image_tag (GstId3v2Tag * id3v2tag, const GstTagList * list,
GST_BUFFER_SIZE (image), mime_type);
id3v2_frame_init (&frame, "APIC", 0);
- id3v2_frame_write_uint8 (&frame, ID3V2_ENCODING_UTF8);
- id3v2_frame_write_bytes (&frame, (const guint8 *) mime_type,
- strlen (mime_type) + 1);
+
+ desc = gst_structure_get_string (s, "image-description");
+ if (!desc)
+ desc = "";
+ encoding = id3v2_tag_string_encoding (id3v2tag, desc);
+ id3v2_frame_write_uint8 (&frame, encoding);
+
+ id3v2_frame_write_string (&frame, encoding, mime_type, TRUE);
/* FIXME set image type properly from caps */
if (strcmp (tag, GST_TAG_PREVIEW_IMAGE) == 0)
@@ -617,11 +695,7 @@ add_image_tag (GstId3v2Tag * id3v2tag, const GstTagList * list,
else
id3v2_frame_write_uint8 (&frame, ID3V2_APIC_PICTURE_OTHER);
- desc = gst_structure_get_string (s, "image-description");
- if (!desc)
- desc = "";
- id3v2_frame_write_bytes (&frame, (const guint8 *) desc,
- strlen (desc) + 1);
+ id3v2_frame_write_string (&frame, encoding, desc, TRUE);
g_array_append_val (id3v2tag->frames, frame);
}
@@ -669,24 +743,22 @@ add_musicbrainz_tag (GstId3v2Tag * id3v2tag, const GstTagList * list,
/* add two frames, one with the ID the musicbrainz.org spec mentions
* and one with the ID that applications use in the real world */
GstId3v2Frame frame1, frame2;
+ int encoding;
GST_DEBUG ("Setting '%s' to '%s'", mb_ids[idx].spec_id, id_str);
+ encoding = id3v2_tag_string_encoding (id3v2tag, id_str);
id3v2_frame_init (&frame1, "TXXX", 0);
- id3v2_frame_write_uint8 (&frame1, ID3V2_ENCODING_UTF8);
- id3v2_frame_write_bytes (&frame1, (const guint8 *) mb_ids[idx].spec_id,
- strlen (mb_ids[idx].spec_id) + 1);
- id3v2_frame_write_bytes (&frame1, (const guint8 *) id_str,
- strlen (id_str) + 1);
+ id3v2_frame_write_uint8 (&frame1, encoding);
+ id3v2_frame_write_string (&frame1, encoding, mb_ids[idx].spec_id, TRUE);
+ id3v2_frame_write_string (&frame1, encoding, id_str, FALSE);
g_array_append_val (id3v2tag->frames, frame1);
id3v2_frame_init (&frame2, "TXXX", 0);
- id3v2_frame_write_uint8 (&frame2, ID3V2_ENCODING_UTF8);
- id3v2_frame_write_bytes (&frame2,
- (const guint8 *) mb_ids[idx].realworld_id,
- strlen (mb_ids[idx].realworld_id) + 1);
- id3v2_frame_write_bytes (&frame2, (const guint8 *) id_str,
- strlen (id_str) + 1);
+ id3v2_frame_write_uint8 (&frame2, encoding);
+ id3v2_frame_write_string (&frame2, encoding,
+ mb_ids[idx].realworld_id, TRUE);
+ id3v2_frame_write_string (&frame2, encoding, id_str, FALSE);
g_array_append_val (id3v2tag->frames, frame2);
g_free (id_str);