diff options
author | Carlos Rafael Giani <crg7475@mailbox.org> | 2023-03-03 12:10:38 +0100 |
---|---|---|
committer | GStreamer Marge Bot <gitlab-merge-bot@gstreamer-foundation.org> | 2023-05-03 08:47:55 +0000 |
commit | 0071c97128f5e7a615807ef1d9b4b5c418589310 (patch) | |
tree | 95caa5f09bbc644eb57eafb018cd9d7a01a687f2 /subprojects | |
parent | 51ebda4df562c134210f420b0415862ea280d3e9 (diff) | |
download | gstreamer-0071c97128f5e7a615807ef1d9b4b5c418589310.tar.gz |
qtdemux: Add audio clipping meta when playing gapless m4a content
Part-of: <https://gitlab.freedesktop.org/gstreamer/gstreamer/-/merge_requests/4200>
Diffstat (limited to 'subprojects')
-rw-r--r-- | subprojects/gst-plugins-good/gst/isomp4/qtdemux.c | 224 | ||||
-rw-r--r-- | subprojects/gst-plugins-good/gst/isomp4/qtdemux.h | 33 | ||||
-rw-r--r-- | subprojects/gst-plugins-good/gst/isomp4/qtdemux_tags.c | 103 | ||||
-rw-r--r-- | subprojects/gst-plugins-good/tests/check/elements/qtdemux.c | 418 | ||||
-rw-r--r-- | subprojects/gst-plugins-good/tests/files/sine-1kHztone-48kHzrate-mono-s32le-200000samples-itunes.m4a | bin | 0 -> 38639 bytes | |||
-rw-r--r-- | subprojects/gst-plugins-good/tests/files/sine-1kHztone-48kHzrate-mono-s32le-200000samples-nero-with-itunsmpb.m4a | bin | 0 -> 19032 bytes | |||
-rw-r--r-- | subprojects/gst-plugins-good/tests/files/sine-1kHztone-48kHzrate-mono-s32le-200000samples-nero-without-itunsmpb.m4a | bin | 0 -> 18530 bytes |
7 files changed, 775 insertions, 3 deletions
diff --git a/subprojects/gst-plugins-good/gst/isomp4/qtdemux.c b/subprojects/gst-plugins-good/gst/isomp4/qtdemux.c index 3d668a9c8e..a872a69090 100644 --- a/subprojects/gst-plugins-good/gst/isomp4/qtdemux.c +++ b/subprojects/gst-plugins-good/gst/isomp4/qtdemux.c @@ -386,6 +386,8 @@ static gboolean gst_qtdemux_stream_update_segment (GstQTDemux * qtdemux, static void gst_qtdemux_send_gap_for_segment (GstQTDemux * demux, QtDemuxStream * stream, gint segment_index, GstClockTime pos); +static void qtdemux_check_if_is_gapless_audio (GstQTDemux * qtdemux); + static gboolean qtdemux_pull_mfro_mfra (GstQTDemux * qtdemux); static void check_update_duration (GstQTDemux * qtdemux, GstClockTime duration); @@ -659,7 +661,12 @@ gst_qtdemux_get_duration (GstQTDemux * qtdemux, GstClockTime * duration) if (qtdemux->duration != 0 && qtdemux->duration != G_MAXINT64 && qtdemux->timescale != 0) { - *duration = QTTIME_TO_GSTTIME (qtdemux, qtdemux->duration); + /* If this is single-stream audio media with gapless data, + * report the duration of the valid subset of the overall data. */ + if (qtdemux->gapless_audio_info.type != GAPLESS_AUDIO_INFO_TYPE_NONE) + *duration = qtdemux->gapless_audio_info.valid_duration; + else + *duration = QTTIME_TO_GSTTIME (qtdemux, qtdemux->duration); res = TRUE; } else { *duration = GST_CLOCK_TIME_NONE; @@ -2048,6 +2055,11 @@ gst_qtdemux_reset (GstQTDemux * qtdemux, gboolean hard) qtdemux->have_group_id = FALSE; qtdemux->group_id = G_MAXUINT; + qtdemux->gapless_audio_info.type = GAPLESS_AUDIO_INFO_TYPE_NONE; + qtdemux->gapless_audio_info.num_start_padding_pcm_frames = 0; + qtdemux->gapless_audio_info.num_end_padding_pcm_frames = 0; + qtdemux->gapless_audio_info.num_valid_pcm_frames = 0; + g_queue_clear_full (&qtdemux->protection_event_queue, (GDestroyNotify) gst_event_unref); @@ -5507,6 +5519,14 @@ gst_qtdemux_stream_update_segment (GstQTDemux * qtdemux, QtDemuxStream * stream, stream->segment.time = time; stream->segment.position = stream->segment.start; + /* Gapless audio requires adjustments to the segment + * to reflect the actual playtime length. In + * particular, this must exclude padding data. */ + if (qtdemux->gapless_audio_info.type != GAPLESS_AUDIO_INFO_TYPE_NONE) { + stream->segment.stop = stream->segment.start + + qtdemux->gapless_audio_info.valid_duration; + } + GST_DEBUG_OBJECT (stream->pad, "New segment: %" GST_SEGMENT_FORMAT, &stream->segment); @@ -6414,6 +6434,83 @@ gst_qtdemux_push_buffer (GstQTDemux * qtdemux, QtDemuxStream * stream, GST_ERROR_OBJECT (qtdemux, "failed to attach aavd metadata to buffer"); } + if (qtdemux->gapless_audio_info.type != GAPLESS_AUDIO_INFO_TYPE_NONE) { + guint64 num_start_padding_pcm_frames; + guint64 audio_sample_offset; + guint64 audio_sample_offset_end; + guint64 start_of_trailing_padding; + guint64 start_clip = 0, end_clip = 0; + guint64 total_num_clipped_samples; + GstClockTime timestamp_decrement; + + /* Attach GstAudioClippingMeta to exclude padding data. */ + + num_start_padding_pcm_frames = + qtdemux->gapless_audio_info.num_start_padding_pcm_frames; + + audio_sample_offset = stream->sample_index * stream->stts_duration; + audio_sample_offset_end = audio_sample_offset + stream->stts_duration; + start_of_trailing_padding = num_start_padding_pcm_frames + + qtdemux->gapless_audio_info.num_valid_pcm_frames; + + if (audio_sample_offset < num_start_padding_pcm_frames) { + guint64 num_padding_audio_samples = + num_start_padding_pcm_frames - audio_sample_offset; + start_clip = MIN (num_padding_audio_samples, stream->stts_duration); + } + + timestamp_decrement = qtdemux->gapless_audio_info.start_padding_duration; + + if (audio_sample_offset >= start_of_trailing_padding) { + /* This case happens when the buffer is located fully past + * the beginning of the padding area at the end of the stream. + * Add the end padding to the decrement amount to ensure + * continuous timestamps when transitioning from gapless + * media to gapless media. */ + end_clip = stream->stts_duration; + timestamp_decrement += qtdemux->gapless_audio_info.end_padding_duration; + } else if (audio_sample_offset_end >= start_of_trailing_padding) { + /* This case happens when the beginning of the padding area that + * is located at the end of the stream intersects the buffer. */ + end_clip = audio_sample_offset_end - start_of_trailing_padding; + } + + total_num_clipped_samples = start_clip + end_clip; + + if (total_num_clipped_samples != 0) { + GST_DEBUG_OBJECT (qtdemux, "adding audio clipping meta: start / " + "end clip: %" G_GUINT64_FORMAT " / %" G_GUINT64_FORMAT, + start_clip, end_clip); + gst_buffer_add_audio_clipping_meta (buf, GST_FORMAT_DEFAULT, + start_clip, end_clip); + + if (total_num_clipped_samples >= stream->stts_duration) { + GST_BUFFER_DURATION (buf) = 0; + GST_BUFFER_FLAG_SET (buf, GST_BUFFER_FLAG_DECODE_ONLY); + GST_BUFFER_FLAG_SET (buf, GST_BUFFER_FLAG_DROPPABLE); + } else { + guint64 num_valid_samples = + stream->stts_duration - total_num_clipped_samples; + GST_BUFFER_DURATION (buf) = + QTSTREAMTIME_TO_GSTTIME (stream, num_valid_samples); + } + } + + /* The timestamps need to be shifted to factor in the skipped padding data. */ + + if (GST_BUFFER_PTS_IS_VALID (buf)) { + GstClockTime ts = GST_BUFFER_PTS (buf); + GST_BUFFER_PTS (buf) = + (ts >= timestamp_decrement) ? (ts - timestamp_decrement) : 0; + } + + if (GST_BUFFER_DTS_IS_VALID (buf)) { + GstClockTime ts = GST_BUFFER_DTS (buf); + GST_BUFFER_DTS (buf) = + (ts >= timestamp_decrement) ? (ts - timestamp_decrement) : 0; + } + } + if (stream->protected && (stream->protection_scheme_type == FOURCC_cenc || stream->protection_scheme_type == FOURCC_cbcs)) { GstStructure *crypto_info; @@ -7565,6 +7662,129 @@ gst_qtdemux_send_gap_for_segment (GstQTDemux * demux, } } +static void +qtdemux_check_if_is_gapless_audio (GstQTDemux * qtdemux) +{ + QtDemuxStream *stream; + + if (QTDEMUX_N_STREAMS (qtdemux) != 1) + goto incompatible_stream; + + stream = QTDEMUX_NTH_STREAM (qtdemux, 0); + + if (stream->subtype != FOURCC_soun || stream->n_segments != 1) + goto incompatible_stream; + + /* Gapless audio info from revdns tags (most notably iTunSMPB) is + * detected in the main udta node. If it isn't present, try as + * fallback to recognize the encoder name, and apply known priming + * and padding quantities specific to the encoder. */ + if (qtdemux->gapless_audio_info.type == GAPLESS_AUDIO_INFO_TYPE_NONE) { + const gchar *orig_encoder_name = NULL; + + if (gst_tag_list_peek_string_index (qtdemux->tag_list, GST_TAG_ENCODER, 0, + &orig_encoder_name) && orig_encoder_name != NULL) { + gchar *lowercase_encoder_name = g_ascii_strdown (orig_encoder_name, -1); + + if (strstr (lowercase_encoder_name, "nero") != NULL) + qtdemux->gapless_audio_info.type = GAPLESS_AUDIO_INFO_TYPE_NERO; + + g_free (lowercase_encoder_name); + + switch (qtdemux->gapless_audio_info.type) { + case GAPLESS_AUDIO_INFO_TYPE_NERO:{ + guint64 total_length; + guint64 valid_length; + guint64 start_padding; + + /* The Nero AAC encoder always uses a lead-in of 1600 PCM frames. + * Also, in Nero AAC's case, stream->duration contains the number + * of PCM frames with start padding but without end padding. + * The decoder delay equals 1 frame length, which is covered by + * factoring stream->stts_duration into the start padding. */ + start_padding = 1600 + stream->stts_duration; + + if (G_UNLIKELY (stream->duration < start_padding)) { + GST_ERROR_OBJECT (qtdemux, "stream duration is %" G_GUINT64_FORMAT + " but start_padding is %" G_GUINT64_FORMAT, stream->duration, + start_padding); + goto invalid_gapless_audio_info; + } + valid_length = stream->duration - start_padding; + + qtdemux->gapless_audio_info.num_start_padding_pcm_frames = + start_padding; + qtdemux->gapless_audio_info.num_valid_pcm_frames = valid_length; + + total_length = stream->n_samples * stream->stts_duration; + + if (G_LIKELY (total_length >= valid_length)) { + guint64 total_padding = total_length - valid_length; + if (G_UNLIKELY (total_padding < start_padding)) { + GST_ERROR_OBJECT (qtdemux, "total_padding is %" G_GUINT64_FORMAT + " but start_padding is %" G_GUINT64_FORMAT, total_padding, + start_padding); + goto invalid_gapless_audio_info; + } + + qtdemux->gapless_audio_info.num_end_padding_pcm_frames = + total_padding - start_padding; + } else { + qtdemux->gapless_audio_info.num_end_padding_pcm_frames = 0; + } + + GST_DEBUG_OBJECT (qtdemux, "media was encoded with Nero AAC encoder; " + "using encoder specific lead-in and padding figures"); + } + + default: + break; + } + } + } + + if (qtdemux->gapless_audio_info.type != GAPLESS_AUDIO_INFO_TYPE_NONE) { + qtdemux->gapless_audio_info.start_padding_duration = + QTSTREAMTIME_TO_GSTTIME (stream, + qtdemux->gapless_audio_info.num_start_padding_pcm_frames); + qtdemux->gapless_audio_info.end_padding_duration = + QTSTREAMTIME_TO_GSTTIME (stream, + qtdemux->gapless_audio_info.num_end_padding_pcm_frames); + qtdemux->gapless_audio_info.valid_duration = + QTSTREAMTIME_TO_GSTTIME (stream, + qtdemux->gapless_audio_info.num_valid_pcm_frames); + } + + GST_DEBUG_OBJECT (qtdemux, "found valid gapless audio info: num start / end " + "PCM padding frames: %" G_GUINT64_FORMAT " / %" G_GUINT64_FORMAT "; " + "start / end padding durations: %" GST_TIME_FORMAT " / %" GST_TIME_FORMAT + "; num valid PCM frames: %" G_GUINT64_FORMAT "; valid duration: %" + GST_TIME_FORMAT, qtdemux->gapless_audio_info.num_start_padding_pcm_frames, + qtdemux->gapless_audio_info.num_end_padding_pcm_frames, + GST_TIME_ARGS (qtdemux->gapless_audio_info.start_padding_duration), + GST_TIME_ARGS (qtdemux->gapless_audio_info.end_padding_duration), + qtdemux->gapless_audio_info.num_valid_pcm_frames, + GST_TIME_ARGS (qtdemux->gapless_audio_info.valid_duration)); + + return; + +incompatible_stream: + if (G_UNLIKELY (qtdemux->gapless_audio_info.type != + GAPLESS_AUDIO_INFO_TYPE_NONE)) { + GST_WARNING_OBJECT (qtdemux, + "media contains gapless audio info, but it is not suitable for " + "gapless audio playback (media must be audio-only, single-stream, " + "single-segment; ignoring unusable gapless info"); + qtdemux->gapless_audio_info.type = GAPLESS_AUDIO_INFO_TYPE_NONE; + } + return; + +invalid_gapless_audio_info: + GST_WARNING_OBJECT (qtdemux, + "media contains invalid/unusable gapless audio info"); + return; +} + static GstFlowReturn gst_qtdemux_chain (GstPad * sinkpad, GstObject * parent, GstBuffer * inbuf) { @@ -14009,6 +14229,8 @@ qtdemux_prepare_streams (GstQTDemux * qtdemux) } } + qtdemux_check_if_is_gapless_audio (qtdemux); + return ret; } diff --git a/subprojects/gst-plugins-good/gst/isomp4/qtdemux.h b/subprojects/gst-plugins-good/gst/isomp4/qtdemux.h index 830ed2fda5..6e7f64b91c 100644 --- a/subprojects/gst-plugins-good/gst/isomp4/qtdemux.h +++ b/subprojects/gst-plugins-good/gst/isomp4/qtdemux.h @@ -54,6 +54,7 @@ typedef struct _QtDemuxSample QtDemuxSample; typedef struct _QtDemuxSegment QtDemuxSegment; typedef struct _QtDemuxRandomAccessEntry QtDemuxRandomAccessEntry; typedef struct _QtDemuxStreamStsdEntry QtDemuxStreamStsdEntry; +typedef struct _QtDemuxGaplessAudioInfo QtDemuxGaplessAudioInfo; typedef GstBuffer * (*QtDemuxProcessFunc)(GstQTDemux * qtdemux, QtDemuxStream * stream, GstBuffer * buf); @@ -78,6 +79,36 @@ typedef enum { VARIANT_MSS_FRAGMENTED, } Variant; +typedef enum { + /* No valid gapless audio info present. Types other than this one + * are used only if all of these apply: + * + * 1. There is embedded gapless audio information available + * 2. Only one stream exists + * 3. Said stream has only one segment + * 4. Said stream is an audio stream + */ + GAPLESS_AUDIO_INFO_TYPE_NONE, + /* Using information from the iTunes iTunSMPB revdns tag. */ + GAPLESS_AUDIO_INFO_TYPE_ITUNES, + /* Using known Nero encoder delay information. */ + GAPLESS_AUDIO_INFO_TYPE_NERO +} QtDemuxGaplessAudioInfoType; + +/* Gapless audio information, only used for single-stream audio-only media. */ +struct _QtDemuxGaplessAudioInfo { + QtDemuxGaplessAudioInfoType type; + + guint64 num_start_padding_pcm_frames; + guint64 num_end_padding_pcm_frames; + guint64 num_valid_pcm_frames; + + /* PCM frame amounts converted to nanoseconds. */ + GstClockTime start_padding_duration; + GstClockTime end_padding_duration; + GstClockTime valid_duration; +}; + struct _GstQTDemux { GstElement element; @@ -177,6 +208,8 @@ struct _GstQTDemux { gint64 chapters_track_id; + QtDemuxGaplessAudioInfo gapless_audio_info; + /* protection support */ GPtrArray *protection_system_ids; /* Holds identifiers of all content protection systems for all tracks */ GQueue protection_event_queue; /* holds copy of upstream protection events */ diff --git a/subprojects/gst-plugins-good/gst/isomp4/qtdemux_tags.c b/subprojects/gst-plugins-good/gst/isomp4/qtdemux_tags.c index 0531dcba59..0ec9cb99cd 100644 --- a/subprojects/gst-plugins-good/gst/isomp4/qtdemux_tags.c +++ b/subprojects/gst-plugins-good/gst/isomp4/qtdemux_tags.c @@ -747,12 +747,111 @@ qtdemux_tag_add_revdns (GstQTDemux * demux, GstTagList * taglist, break; } } - if (i == G_N_ELEMENTS (tags)) - goto unknown_tag; + + /* Some tags might not actually be used for metadata about the media, + * but for other purposes. One such tag is iTunSMPB, which contains + * padding information for gapless playback. Scan these separately. */ + if (i == G_N_ELEMENTS (tags)) { + if (!g_ascii_strncasecmp ("iTunSMPB", namestr, 8)) { + /* iTunSMPB tag format goes as follows: + * + * " 00000000 xxxxxxxx yyyyyyyy zzzzzzzzzzzzzzzz 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000" + * + * The data is actually an ASCII string containing these hex fields. + * The description above is _not_ a description of a binary format! + * These need to be parsed with g_ascii_strtoull() and base 16. + * + * (The quotes are not part of it; they just emphasize the + * whitespace at the beginning of the string). + * + * Only the fields marked with x/y/z are of interest here. + * + * The x field is the priming, in samples. + * These are the padding samples at the beginning of the stream. + * + * The y field is the remainder, in samples. + * These are the padding samples at the end of the stream. + * + * The z field is the number of valid PCM frames, excluding the + * priming and remainder. (In other words, the number of PCM + * frames that make up the actual audio, without the padding.) + * + * The data starts at offset 16. All access to it must therefore skip + * the first 16 bytes. + */ + + const gsize start_offset = 16; + const gsize priming_offset = start_offset + 10; + const gsize remainder_offset = start_offset + 19; + const gsize num_valid_pcm_frames_offset = start_offset + 28; + const gsize total_length = 44; + const gchar *str; + guint64 priming; + guint64 remainder; + guint64 num_valid_pcm_frames; + /* Temporary buffer for g_ascii_strtoull() calls. + * Add extra +1 space for nullbyte. */ + gchar tmp[16 + 1]; + + /* Use the iTunSMPB info if no other info has been found yet. */ + if (demux->gapless_audio_info.type != GAPLESS_AUDIO_INFO_TYPE_NONE) { + GST_DEBUG_OBJECT (demux, "iTunSMPB information found, " + "but other gapless audio info was already read"); + goto finish; + } + + if (G_UNLIKELY (datasize < (start_offset + total_length))) { + GST_WARNING_OBJECT (demux, + "iTunSMPB tag data size too small - not parsing"); + goto finish; + } + + str = (gchar *) ((guint8 *) data->data); + +#define PARSE_ITUNSMPB_FIELD(FIELD_NAME, NUM_DIGITS) \ + G_STMT_START \ + { \ + gint str_idx; \ +\ + for (str_idx = 0; str_idx < (NUM_DIGITS); ++str_idx) { \ + gchar ch = str[FIELD_NAME ## _offset + str_idx]; \ + if (!g_ascii_isxdigit (ch)) { \ + GST_WARNING_OBJECT (demux, #FIELD_NAME " field in iTunSMPB " \ + "tag data has invalid character '%c'", ch); \ + goto finish; \ + } \ + tmp[str_idx] = ch; \ + } \ + tmp[NUM_DIGITS] = 0; \ +\ + FIELD_NAME = g_ascii_strtoull (tmp, NULL, 16); \ + } \ + G_STMT_END + + PARSE_ITUNSMPB_FIELD (priming, 8); + PARSE_ITUNSMPB_FIELD (remainder, 8); + PARSE_ITUNSMPB_FIELD (num_valid_pcm_frames, 16); + +#undef PARSE_ITUNSMPB_FIELD + + GST_DEBUG_OBJECT (demux, "iTunSMPB information: priming %" + G_GUINT64_FORMAT " remainder %" G_GUINT64_FORMAT + " num valid PCM frames %" G_GUINT64_FORMAT, priming, remainder, + num_valid_pcm_frames); + + demux->gapless_audio_info.type = GAPLESS_AUDIO_INFO_TYPE_ITUNES; + demux->gapless_audio_info.num_start_padding_pcm_frames = priming; + demux->gapless_audio_info.num_end_padding_pcm_frames = remainder; + demux->gapless_audio_info.num_valid_pcm_frames = num_valid_pcm_frames; + } else { + goto unknown_tag; + } + } } else { goto unknown_tag; } +finish: return; /* errors */ diff --git a/subprojects/gst-plugins-good/tests/check/elements/qtdemux.c b/subprojects/gst-plugins-good/tests/check/elements/qtdemux.c index 04092afff1..c98a0d655e 100644 --- a/subprojects/gst-plugins-good/tests/check/elements/qtdemux.c +++ b/subprojects/gst-plugins-good/tests/check/elements/qtdemux.c @@ -27,6 +27,8 @@ #include <glib/gprintf.h> #include <gst/check/check.h> +#include <gst/app/gstappsink.h> +#include <gst/audio/audio.h> #define TEST_FILE_PREFIX GST_TEST_FILES_PATH G_DIR_SEPARATOR_S @@ -1200,6 +1202,419 @@ GST_START_TEST (test_qtdemux_mss_fragment) GST_END_TEST; +typedef struct +{ + const gchar *filename; + /* Total number of AAC frames, including any and all dummy/empty/padding frames. */ + guint num_aac_frames; + /* In AAC, this is 1024 in the vast majority of the cases. + * AAC can also use 960 samples per frame, but this is rare. */ + guint num_samples_per_frame; + /* How many padding samples to expect at the beginning and the end. + * The amount of padding samples can exceed the size of a frame. + * This means that the first and last N frame(s) can actually be + * fully made of padding samples and thus need to be thrown away. */ + guint num_start_padding_samples; + guint num_end_padding_samples; + guint sample_rate; + /* Some encoders produce data whose last frame uses a different + * (smaller) stts value to handle the padding at the end. Data + * produced by such encoders will not get a clipmeta added at the + * end. When using test data produced by such an encoder, this + * must be set to FALSE, otherwise it must be set to TRUE. + * Notably, anything that produces an iTunSMPB tag (iTunes itself + * as well as newer Nero encoders for example) will cause such + * a clipmeta to be added. */ + gboolean expect_clipmeta_at_end; + + /* Total number of samples available, with / without padding + * samples factored in. */ + guint64 num_samples_with_padding; + guint64 num_samples_without_padding; + + /* The index of the first / last frame that contains valid samples. + * Indices start with 0. Valid range is [0 , (num_aac_frames-1)]. + * In virtually all cases, when the AAC data was encoded with iTunes, + * the first and last valid frames will be partially clipped. */ + guint first_frame_with_valid_samples; + guint last_frame_with_valid_samples; + + guint64 num_samples_in_first_valid_frame; + guint64 num_samples_in_last_valid_frame; + + GstClockTime total_duration_without_padding; + + GstElement *appsink; +} GaplessTestInfo; + +static void +precalculate_gapless_test_factors (GaplessTestInfo * info) +{ + info->num_samples_with_padding = info->num_aac_frames * + info->num_samples_per_frame; + info->num_samples_without_padding = info->num_samples_with_padding - + info->num_start_padding_samples - info->num_end_padding_samples; + + info->first_frame_with_valid_samples = info->num_start_padding_samples / + info->num_samples_per_frame; + info->last_frame_with_valid_samples = (info->num_samples_with_padding - + info->num_end_padding_samples) / info->num_samples_per_frame; + + info->num_samples_in_first_valid_frame = + (info->first_frame_with_valid_samples + 1) * info->num_samples_per_frame - + info->num_start_padding_samples; + info->num_samples_in_last_valid_frame = + (info->num_samples_with_padding - info->num_end_padding_samples) - + info->last_frame_with_valid_samples * info->num_samples_per_frame; + + /* The total actual playtime duration. */ + info->total_duration_without_padding = + gst_util_uint64_scale_int (info->num_samples_without_padding, GST_SECOND, + info->sample_rate); + + GST_DEBUG ("num_samples_with_padding %" G_GUINT64_FORMAT + " num_samples_without_padding %" G_GUINT64_FORMAT + " first_frame_with_valid_samples %u" + " last_frame_with_valid_samples %u" + " num_samples_in_first_valid_frame %" G_GUINT64_FORMAT + " num_samples_in_last_valid_frame %" G_GUINT64_FORMAT + " total_duration_without_padding %" G_GUINT64_FORMAT, + info->num_samples_with_padding, info->num_samples_without_padding, + info->first_frame_with_valid_samples, info->last_frame_with_valid_samples, + info->num_samples_in_first_valid_frame, + info->num_samples_in_last_valid_frame, + info->total_duration_without_padding); +} + +static void +setup_gapless_itunes_test_info (GaplessTestInfo * info) +{ + info->filename = + "sine-1kHztone-48kHzrate-mono-s32le-200000samples-itunes.m4a"; + info->num_aac_frames = 198; + info->num_samples_per_frame = 1024; + info->sample_rate = 48000; + info->expect_clipmeta_at_end = TRUE; + + info->num_start_padding_samples = 2112; + info->num_end_padding_samples = 640; + + precalculate_gapless_test_factors (info); +} + +static void +setup_gapless_nero_with_itunsmpb_test_info (GaplessTestInfo * info) +{ + info->filename = + "sine-1kHztone-48kHzrate-mono-s32le-200000samples-nero-with-itunsmpb.m4a"; + info->num_aac_frames = 198; + info->num_samples_per_frame = 1024; + info->sample_rate = 48000; + info->expect_clipmeta_at_end = TRUE; + + info->num_start_padding_samples = 2624; + info->num_end_padding_samples = 128; + + precalculate_gapless_test_factors (info); +} + +static void +setup_gapless_nero_without_itunsmpb_test_info (GaplessTestInfo * info) +{ + info->filename = + "sine-1kHztone-48kHzrate-mono-s32le-200000samples-nero-without-itunsmpb.m4a"; + info->num_aac_frames = 198; + info->num_samples_per_frame = 1024; + info->sample_rate = 48000; + /* Older Nero AAC encoders produce a different stts value for the + * last frame to skip padding data. In this file, all frames except + * the last one use an stts value of 1024, while the last value + * uses an stts value of 896. Consequently, the logic inside qtdemux + * won't deem it necessary to add an audioclipmeta - there are no + * padding samples to clip. */ + info->expect_clipmeta_at_end = FALSE; + + info->num_start_padding_samples = 2624; + info->num_end_padding_samples = 128; + + precalculate_gapless_test_factors (info); +} + +static void +check_parsed_aac_frame (GaplessTestInfo * info, guint frame_num) +{ + GstClockTime expected_pts = GST_CLOCK_TIME_NONE; + GstClockTime expected_duration = GST_CLOCK_TIME_NONE; + GstClockTimeDiff ts_delta; + guint64 expected_sample_offset; + guint64 expected_num_samples; + gboolean expect_audioclipmeta = FALSE; + guint64 expected_audioclipmeta_start = 0; + guint64 expected_audioclipmeta_end = 0; + GstSample *sample; + GstBuffer *buffer; + GstAudioClippingMeta *audioclip_meta; + + if (frame_num < info->first_frame_with_valid_samples) { + /* Frame is at the beginning and is fully clipped. */ + expected_sample_offset = 0; + expected_num_samples = 0; + + expected_audioclipmeta_start = info->num_samples_per_frame; + expected_audioclipmeta_end = 0; + } else if (frame_num == info->first_frame_with_valid_samples) { + /* Frame is at the beginning and is partially clipped. */ + + expected_sample_offset = 0; + expected_num_samples = info->num_samples_in_first_valid_frame; + + expected_audioclipmeta_start = info->num_samples_per_frame - + info->num_samples_in_first_valid_frame; + expected_audioclipmeta_end = 0; + } else if (frame_num < info->last_frame_with_valid_samples) { + /* Regular, unclipped frame. */ + + expected_sample_offset = info->num_samples_in_first_valid_frame + + info->num_samples_per_frame * (frame_num - + info->first_frame_with_valid_samples - 1); + expected_num_samples = info->num_samples_per_frame; + } else if (frame_num == info->last_frame_with_valid_samples) { + /* The first frame at the end with padding samples. This one will have + * the last few valid samples, followed by the first padding samples. */ + + expected_sample_offset = info->num_samples_in_first_valid_frame + + info->num_samples_per_frame * (frame_num - + info->first_frame_with_valid_samples - 1); + expected_num_samples = info->num_samples_in_last_valid_frame; + + if (info->expect_clipmeta_at_end) { + expect_audioclipmeta = TRUE; + expected_audioclipmeta_start = 0; + expected_audioclipmeta_end = + info->num_samples_per_frame - expected_num_samples; + } + } else { + /* A fully clipped frame at the end of the stream. */ + + expected_sample_offset = info->num_samples_in_first_valid_frame + + info->num_samples_without_padding; + expected_num_samples = 0; + + if (info->expect_clipmeta_at_end) { + expect_audioclipmeta = TRUE; + expected_audioclipmeta_start = 0; + expected_audioclipmeta_end = info->num_samples_per_frame; + } + } + + /* Pull the frame from appsink so we can check it. */ + + sample = gst_app_sink_pull_sample (GST_APP_SINK (info->appsink)); + fail_if (sample == NULL); + fail_unless (GST_IS_SAMPLE (sample)); + + expected_pts = gst_util_uint64_scale_int (expected_sample_offset, + GST_SECOND, info->sample_rate); + expected_duration = gst_util_uint64_scale_int (expected_num_samples, + GST_SECOND, info->sample_rate); + + buffer = gst_sample_get_buffer (sample); + fail_if (buffer == NULL); + + /* Verify the sample's PTS and duration. Allow for 1 nanosecond difference + * to account for rounding errors in sample <-> timestamp conversions. */ + ts_delta = GST_CLOCK_DIFF (GST_BUFFER_PTS (buffer), expected_pts); + fail_unless (ABS (ts_delta) <= 1); + ts_delta = GST_CLOCK_DIFF (GST_BUFFER_DURATION (buffer), expected_duration); + fail_unless (ABS (ts_delta) <= 1); + /* Check if there's audio clip metadata, and verify it if it exists. */ + if (expect_audioclipmeta) { + audioclip_meta = gst_buffer_get_audio_clipping_meta (buffer); + fail_if (audioclip_meta == NULL); + fail_unless_equals_uint64 (audioclip_meta->start, + expected_audioclipmeta_start); + fail_unless_equals_uint64 (audioclip_meta->end, expected_audioclipmeta_end); + } + + gst_sample_unref (sample); +} + +static void +qtdemux_pad_added_cb_for_gapless (GstElement * demux, GstPad * pad, + GaplessTestInfo * info) +{ + GstPad *appsink_pad; + GstPadLinkReturn ret; + + appsink_pad = gst_element_get_static_pad (info->appsink, "sink"); + + if (gst_pad_is_linked (appsink_pad)) + goto finish; + + ret = gst_pad_link (pad, appsink_pad); + if (GST_PAD_LINK_FAILED (ret)) { + GST_ERROR ("Could not link qtdemux and appsink: %s", + gst_pad_link_get_name (ret)); + } + +finish: + gst_object_unref (GST_OBJECT (appsink_pad)); +} + +static void +perform_gapless_test (GaplessTestInfo * info) +{ + GstElement *source, *demux, *appsink, *pipeline; + GstStateChangeReturn state_ret; + guint frame_num; + + pipeline = gst_pipeline_new (NULL); + source = gst_element_factory_make ("filesrc", NULL); + demux = gst_element_factory_make ("qtdemux", NULL); + appsink = gst_element_factory_make ("appsink", NULL); + + info->appsink = appsink; + + g_signal_connect (demux, "pad-added", (GCallback) + qtdemux_pad_added_cb_for_gapless, info); + + gst_bin_add_many (GST_BIN (pipeline), source, demux, appsink, NULL); + gst_element_link (source, demux); + + { + char *full_filename = + g_build_filename (GST_TEST_FILES_PATH, info->filename, NULL); + g_object_set (G_OBJECT (source), "location", full_filename, NULL); + g_free (full_filename); + } + + g_object_set (G_OBJECT (appsink), "async", FALSE, "sync", FALSE, + "max-buffers", 1, "enable-last-sample", FALSE, "processing-deadline", + G_MAXUINT64, NULL); + + state_ret = gst_element_set_state (pipeline, GST_STATE_PLAYING); + + fail_unless (state_ret != GST_STATE_CHANGE_FAILURE); + + if (state_ret == GST_STATE_CHANGE_ASYNC) { + GST_LOG ("waiting for pipeline to reach PAUSED state"); + state_ret = gst_element_get_state (pipeline, NULL, NULL, -1); + fail_unless_equals_int (state_ret, GST_STATE_CHANGE_SUCCESS); + } + + /* Verify all frames from the test signal. */ + for (frame_num = 0; frame_num < info->num_aac_frames; ++frame_num) + check_parsed_aac_frame (info, frame_num); + + /* Check what duration is returned by a query. This duration must exclude + * the padding samples. */ + { + GstQuery *query; + gint64 duration; + GstFormat format; + + query = gst_query_new_duration (GST_FORMAT_TIME); + fail_unless (gst_element_query (pipeline, query)); + + gst_query_parse_duration (query, &format, &duration); + fail_unless_equals_int (format, GST_FORMAT_TIME); + fail_unless_equals_uint64 ((guint64) duration, + info->total_duration_without_padding); + + gst_query_unref (query); + } + + /* Seek tests: Here we seek to a certain position that corresponds to a + * certain frame. Then we check if we indeed got that frame. */ + + /* Seek back to the first frame. This will _not_ be the first valid frame. + * Instead, it will be a frame that gets only decoded and has duration + * zero. Other zero-duration frames may follow, until the first frame + * with valid data is encountered. This means that when the user seeks + * to position 0, downstream will subsequently get a number of buffers + * with PTS 0, and all of those buffers except the last will have a + * duration of 0. */ + { + fail_unless_equals_int (gst_element_set_state (pipeline, GST_STATE_PAUSED), + GST_STATE_CHANGE_SUCCESS); + gst_element_seek_simple (pipeline, GST_FORMAT_TIME, GST_SEEK_FLAG_FLUSH, 0); + fail_unless_equals_int (gst_element_set_state (pipeline, GST_STATE_PLAYING), + GST_STATE_CHANGE_SUCCESS); + + check_parsed_aac_frame (info, 0); + } + + /* Now move to the frame past the very first one that contained valid samples. + * This very first frame will usually be clipped, and be output as the last + * buffer at PTS 0 (see above). */ + { + GstClockTime position; + + position = + gst_util_uint64_scale_int (info->num_samples_in_first_valid_frame, + GST_SECOND, info->sample_rate); + + fail_unless_equals_int (gst_element_set_state (pipeline, GST_STATE_PAUSED), + GST_STATE_CHANGE_SUCCESS); + gst_element_seek_simple (pipeline, GST_FORMAT_TIME, GST_SEEK_FLAG_FLUSH, + position); + fail_unless_equals_int (gst_element_set_state (pipeline, GST_STATE_PLAYING), + GST_STATE_CHANGE_SUCCESS); + + check_parsed_aac_frame (info, info->first_frame_with_valid_samples + 1); + } + + /* Seek to the last frame with valid samples (= the first frame with padding + * samples at the end of the stream). */ + { + GstClockTime position; + + position = + gst_util_uint64_scale_int (info->num_samples_in_first_valid_frame + + info->num_samples_without_padding - info->num_samples_per_frame, + GST_SECOND, info->sample_rate); + + fail_unless_equals_int (gst_element_set_state (pipeline, GST_STATE_PAUSED), + GST_STATE_CHANGE_SUCCESS); + gst_element_seek_simple (pipeline, GST_FORMAT_TIME, GST_SEEK_FLAG_FLUSH, + position); + fail_unless_equals_int (gst_element_set_state (pipeline, GST_STATE_PLAYING), + GST_STATE_CHANGE_SUCCESS); + + check_parsed_aac_frame (info, info->last_frame_with_valid_samples); + } + + gst_element_set_state (pipeline, GST_STATE_NULL); + gst_object_unref (pipeline); +} + +GST_START_TEST (test_qtdemux_gapless_itunes_data) +{ + GaplessTestInfo info; + setup_gapless_itunes_test_info (&info); + perform_gapless_test (&info); +} + +GST_END_TEST; + +GST_START_TEST (test_qtdemux_gapless_nero_data_with_itunsmpb) +{ + GaplessTestInfo info; + setup_gapless_nero_with_itunsmpb_test_info (&info); + perform_gapless_test (&info); +} + +GST_END_TEST; + +GST_START_TEST (test_qtdemux_gapless_nero_data_without_itunsmpb) +{ + GaplessTestInfo info; + setup_gapless_nero_without_itunsmpb_test_info (&info); + perform_gapless_test (&info); +} + +GST_END_TEST; + static Suite * qtdemux_suite (void) { @@ -1215,6 +1630,9 @@ qtdemux_suite (void) tcase_add_test (tc_chain, test_qtdemux_pad_names); tcase_add_test (tc_chain, test_qtdemux_compensate_data_offset); tcase_add_test (tc_chain, test_qtdemux_mss_fragment); + tcase_add_test (tc_chain, test_qtdemux_gapless_itunes_data); + tcase_add_test (tc_chain, test_qtdemux_gapless_nero_data_with_itunsmpb); + tcase_add_test (tc_chain, test_qtdemux_gapless_nero_data_without_itunsmpb); return s; } diff --git a/subprojects/gst-plugins-good/tests/files/sine-1kHztone-48kHzrate-mono-s32le-200000samples-itunes.m4a b/subprojects/gst-plugins-good/tests/files/sine-1kHztone-48kHzrate-mono-s32le-200000samples-itunes.m4a Binary files differnew file mode 100644 index 0000000000..ddea727aa5 --- /dev/null +++ b/subprojects/gst-plugins-good/tests/files/sine-1kHztone-48kHzrate-mono-s32le-200000samples-itunes.m4a diff --git a/subprojects/gst-plugins-good/tests/files/sine-1kHztone-48kHzrate-mono-s32le-200000samples-nero-with-itunsmpb.m4a b/subprojects/gst-plugins-good/tests/files/sine-1kHztone-48kHzrate-mono-s32le-200000samples-nero-with-itunsmpb.m4a Binary files differnew file mode 100644 index 0000000000..963e9cf2d3 --- /dev/null +++ b/subprojects/gst-plugins-good/tests/files/sine-1kHztone-48kHzrate-mono-s32le-200000samples-nero-with-itunsmpb.m4a diff --git a/subprojects/gst-plugins-good/tests/files/sine-1kHztone-48kHzrate-mono-s32le-200000samples-nero-without-itunsmpb.m4a b/subprojects/gst-plugins-good/tests/files/sine-1kHztone-48kHzrate-mono-s32le-200000samples-nero-without-itunsmpb.m4a Binary files differnew file mode 100644 index 0000000000..da9a618667 --- /dev/null +++ b/subprojects/gst-plugins-good/tests/files/sine-1kHztone-48kHzrate-mono-s32le-200000samples-nero-without-itunsmpb.m4a |