/* * Audio Toolbox system codecs * * copyright (c) 2016 rcombs * * This file is part of FFmpeg. * * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include "config.h" #include "config_components.h" #include "avcodec.h" #include "ac3_parser_internal.h" #include "bytestream.h" #include "codec_internal.h" #include "decode.h" #include "mpegaudiodecheader.h" #include "libavutil/avassert.h" #include "libavutil/channel_layout.h" #include "libavutil/opt.h" #include "libavutil/log.h" #if __MAC_OS_X_VERSION_MIN_REQUIRED < 101100 #define kAudioFormatEnhancedAC3 'ec-3' #endif typedef struct ATDecodeContext { AVClass *av_class; AudioConverterRef converter; AudioStreamPacketDescription pkt_desc; AVPacket in_pkt; AVPacket new_in_pkt; char *decoded_data; int channel_map[64]; uint8_t *extradata; int extradata_size; int64_t last_pts; int eof; } ATDecodeContext; static UInt32 ffat_get_format_id(enum AVCodecID codec, int profile) { switch (codec) { case AV_CODEC_ID_AAC: return kAudioFormatMPEG4AAC; case AV_CODEC_ID_AC3: return kAudioFormatAC3; case AV_CODEC_ID_ADPCM_IMA_QT: return kAudioFormatAppleIMA4; case AV_CODEC_ID_ALAC: return kAudioFormatAppleLossless; case AV_CODEC_ID_AMR_NB: return kAudioFormatAMR; case AV_CODEC_ID_EAC3: return kAudioFormatEnhancedAC3; case AV_CODEC_ID_GSM_MS: return kAudioFormatMicrosoftGSM; case AV_CODEC_ID_ILBC: return kAudioFormatiLBC; case AV_CODEC_ID_MP1: return kAudioFormatMPEGLayer1; case AV_CODEC_ID_MP2: return kAudioFormatMPEGLayer2; case AV_CODEC_ID_MP3: return kAudioFormatMPEGLayer3; case AV_CODEC_ID_PCM_ALAW: return kAudioFormatALaw; case AV_CODEC_ID_PCM_MULAW: return kAudioFormatULaw; case AV_CODEC_ID_QDMC: return kAudioFormatQDesign; case AV_CODEC_ID_QDM2: return kAudioFormatQDesign2; default: av_assert0(!"Invalid codec ID!"); return 0; } } static int ffat_get_channel_id(AudioChannelLabel label) { if (label == 0) return -1; else if (label <= kAudioChannelLabel_LFEScreen) return label - 1; else if (label <= kAudioChannelLabel_RightSurround) return label + 4; else if (label <= kAudioChannelLabel_CenterSurround) return label + 1; else if (label <= kAudioChannelLabel_RightSurroundDirect) return label + 23; else if (label <= kAudioChannelLabel_TopBackRight) return label - 1; else if (label < kAudioChannelLabel_RearSurroundLeft) return -1; else if (label <= kAudioChannelLabel_RearSurroundRight) return label - 29; else if (label <= kAudioChannelLabel_RightWide) return label - 4; else if (label == kAudioChannelLabel_LFE2) return ff_ctzll(AV_CH_LOW_FREQUENCY_2); else if (label == kAudioChannelLabel_Mono) return ff_ctzll(AV_CH_FRONT_CENTER); else return -1; } static int ffat_compare_channel_descriptions(const void* a, const void* b) { const AudioChannelDescription* da = a; const AudioChannelDescription* db = b; return ffat_get_channel_id(da->mChannelLabel) - ffat_get_channel_id(db->mChannelLabel); } static AudioChannelLayout *ffat_convert_layout(AudioChannelLayout *layout, UInt32* size) { AudioChannelLayoutTag tag = layout->mChannelLayoutTag; AudioChannelLayout *new_layout; if (tag == kAudioChannelLayoutTag_UseChannelDescriptions) return layout; else if (tag == kAudioChannelLayoutTag_UseChannelBitmap) AudioFormatGetPropertyInfo(kAudioFormatProperty_ChannelLayoutForBitmap, sizeof(UInt32), &layout->mChannelBitmap, size); else AudioFormatGetPropertyInfo(kAudioFormatProperty_ChannelLayoutForTag, sizeof(AudioChannelLayoutTag), &tag, size); new_layout = av_malloc(*size); if (!new_layout) { av_free(layout); return NULL; } if (tag == kAudioChannelLayoutTag_UseChannelBitmap) AudioFormatGetProperty(kAudioFormatProperty_ChannelLayoutForBitmap, sizeof(UInt32), &layout->mChannelBitmap, size, new_layout); else AudioFormatGetProperty(kAudioFormatProperty_ChannelLayoutForTag, sizeof(AudioChannelLayoutTag), &tag, size, new_layout); new_layout->mChannelLayoutTag = kAudioChannelLayoutTag_UseChannelDescriptions; av_free(layout); return new_layout; } static int ffat_update_ctx(AVCodecContext *avctx) { ATDecodeContext *at = avctx->priv_data; AudioStreamBasicDescription format; UInt32 size = sizeof(format); if (!AudioConverterGetProperty(at->converter, kAudioConverterCurrentInputStreamDescription, &size, &format)) { if (format.mSampleRate) avctx->sample_rate = format.mSampleRate; av_channel_layout_uninit(&avctx->ch_layout); av_channel_layout_default(&avctx->ch_layout, format.mChannelsPerFrame); avctx->frame_size = format.mFramesPerPacket; } if (!AudioConverterGetProperty(at->converter, kAudioConverterCurrentOutputStreamDescription, &size, &format)) { format.mSampleRate = avctx->sample_rate; format.mChannelsPerFrame = avctx->ch_layout.nb_channels; AudioConverterSetProperty(at->converter, kAudioConverterCurrentOutputStreamDescription, size, &format); } if (!AudioConverterGetPropertyInfo(at->converter, kAudioConverterOutputChannelLayout, &size, NULL) && size) { AudioChannelLayout *layout = av_malloc(size); uint64_t layout_mask = 0; int i; if (!layout) return AVERROR(ENOMEM); AudioConverterGetProperty(at->converter, kAudioConverterOutputChannelLayout, &size, layout); if (!(layout = ffat_convert_layout(layout, &size))) return AVERROR(ENOMEM); for (i = 0; i < layout->mNumberChannelDescriptions; i++) { int id = ffat_get_channel_id(layout->mChannelDescriptions[i].mChannelLabel); if (id < 0) goto done; if (layout_mask & (1 << id)) goto done; layout_mask |= 1 << id; layout->mChannelDescriptions[i].mChannelFlags = i; // Abusing flags as index } av_channel_layout_uninit(&avctx->ch_layout); av_channel_layout_from_mask(&avctx->ch_layout, layout_mask); qsort(layout->mChannelDescriptions, layout->mNumberChannelDescriptions, sizeof(AudioChannelDescription), &ffat_compare_channel_descriptions); for (i = 0; i < layout->mNumberChannelDescriptions; i++) at->channel_map[i] = layout->mChannelDescriptions[i].mChannelFlags; done: av_free(layout); } if (!avctx->frame_size) avctx->frame_size = 2048; return 0; } static void put_descr(PutByteContext *pb, int tag, unsigned int size) { int i = 3; bytestream2_put_byte(pb, tag); for (; i > 0; i--) bytestream2_put_byte(pb, (size >> (7 * i)) | 0x80); bytestream2_put_byte(pb, size & 0x7F); } static uint8_t* ffat_get_magic_cookie(AVCodecContext *avctx, UInt32 *cookie_size) { ATDecodeContext *at = avctx->priv_data; if (avctx->codec_id == AV_CODEC_ID_AAC) { char *extradata; PutByteContext pb; *cookie_size = 5 + 3 + 5+13 + 5+at->extradata_size; if (!(extradata = av_malloc(*cookie_size))) return NULL; bytestream2_init_writer(&pb, extradata, *cookie_size); // ES descriptor put_descr(&pb, 0x03, 3 + 5+13 + 5+at->extradata_size); bytestream2_put_be16(&pb, 0); bytestream2_put_byte(&pb, 0x00); // flags (= no flags) // DecoderConfig descriptor put_descr(&pb, 0x04, 13 + 5+at->extradata_size); // Object type indication bytestream2_put_byte(&pb, 0x40); bytestream2_put_byte(&pb, 0x15); // flags (= Audiostream) bytestream2_put_be24(&pb, 0); // Buffersize DB bytestream2_put_be32(&pb, 0); // maxbitrate bytestream2_put_be32(&pb, 0); // avgbitrate // DecoderSpecific info descriptor put_descr(&pb, 0x05, at->extradata_size); bytestream2_put_buffer(&pb, at->extradata, at->extradata_size); return extradata; } else { *cookie_size = at->extradata_size; return at->extradata; } } static av_cold int ffat_usable_extradata(AVCodecContext *avctx) { ATDecodeContext *at = avctx->priv_data; return at->extradata_size && (avctx->codec_id == AV_CODEC_ID_ALAC || avctx->codec_id == AV_CODEC_ID_QDM2 || avctx->codec_id == AV_CODEC_ID_QDMC || avctx->codec_id == AV_CODEC_ID_AAC); } static int ffat_set_extradata(AVCodecContext *avctx) { ATDecodeContext *at = avctx->priv_data; if (ffat_usable_extradata(avctx)) { OSStatus status; UInt32 cookie_size; uint8_t *cookie = ffat_get_magic_cookie(avctx, &cookie_size); if (!cookie) return AVERROR(ENOMEM); status = AudioConverterSetProperty(at->converter, kAudioConverterDecompressionMagicCookie, cookie_size, cookie); if (status != 0) av_log(avctx, AV_LOG_WARNING, "AudioToolbox cookie error: %i\n", (int)status); if (cookie != at->extradata) av_free(cookie); } return 0; } static av_cold int ffat_create_decoder(AVCodecContext *avctx, const AVPacket *pkt) { ATDecodeContext *at = avctx->priv_data; OSStatus status; int i; enum AVSampleFormat sample_fmt = (avctx->bits_per_raw_sample == 32) ? AV_SAMPLE_FMT_S32 : AV_SAMPLE_FMT_S16; AudioStreamBasicDescription in_format = { .mFormatID = ffat_get_format_id(avctx->codec_id, avctx->profile), .mBytesPerPacket = (avctx->codec_id == AV_CODEC_ID_ILBC) ? avctx->block_align : 0, }; AudioStreamBasicDescription out_format = { .mFormatID = kAudioFormatLinearPCM, .mFormatFlags = kAudioFormatFlagIsSignedInteger | kAudioFormatFlagIsPacked, .mFramesPerPacket = 1, .mBitsPerChannel = av_get_bytes_per_sample(sample_fmt) * 8, }; avctx->sample_fmt = sample_fmt; if (ffat_usable_extradata(avctx)) { UInt32 format_size = sizeof(in_format); UInt32 cookie_size; uint8_t *cookie = ffat_get_magic_cookie(avctx, &cookie_size); if (!cookie) return AVERROR(ENOMEM); status = AudioFormatGetProperty(kAudioFormatProperty_FormatInfo, cookie_size, cookie, &format_size, &in_format); if (cookie != at->extradata) av_free(cookie); if (status != 0) { av_log(avctx, AV_LOG_ERROR, "AudioToolbox header-parse error: %i\n", (int)status); return AVERROR_UNKNOWN; } #if CONFIG_MP1_AT_DECODER || CONFIG_MP2_AT_DECODER || CONFIG_MP3_AT_DECODER } else if (pkt && pkt->size >= 4 && (avctx->codec_id == AV_CODEC_ID_MP1 || avctx->codec_id == AV_CODEC_ID_MP2 || avctx->codec_id == AV_CODEC_ID_MP3)) { enum AVCodecID codec_id; int bit_rate; if (ff_mpa_decode_header(AV_RB32(pkt->data), &avctx->sample_rate, &in_format.mChannelsPerFrame, &avctx->frame_size, &bit_rate, &codec_id) < 0) return AVERROR_INVALIDDATA; avctx->bit_rate = bit_rate; in_format.mSampleRate = avctx->sample_rate; #endif #if CONFIG_AC3_AT_DECODER || CONFIG_EAC3_AT_DECODER } else if (pkt && pkt->size >= 7 && (avctx->codec_id == AV_CODEC_ID_AC3 || avctx->codec_id == AV_CODEC_ID_EAC3)) { AC3HeaderInfo hdr; GetBitContext gbc; init_get_bits8(&gbc, pkt->data, pkt->size); if (ff_ac3_parse_header(&gbc, &hdr) < 0) return AVERROR_INVALIDDATA; in_format.mSampleRate = hdr.sample_rate; in_format.mChannelsPerFrame = hdr.channels; avctx->frame_size = hdr.num_blocks * 256; avctx->bit_rate = hdr.bit_rate; #endif } else { in_format.mSampleRate = avctx->sample_rate ? avctx->sample_rate : 44100; in_format.mChannelsPerFrame = avctx->ch_layout.nb_channels ? avctx->ch_layout.nb_channels : 1; } avctx->sample_rate = out_format.mSampleRate = in_format.mSampleRate; av_channel_layout_uninit(&avctx->ch_layout); avctx->ch_layout.order = AV_CHANNEL_ORDER_UNSPEC; avctx->ch_layout.nb_channels = out_format.mChannelsPerFrame = in_format.mChannelsPerFrame; out_format.mBytesPerFrame = out_format.mChannelsPerFrame * (out_format.mBitsPerChannel / 8); out_format.mBytesPerPacket = out_format.mBytesPerFrame * out_format.mFramesPerPacket; if (avctx->codec_id == AV_CODEC_ID_ADPCM_IMA_QT) in_format.mFramesPerPacket = 64; status = AudioConverterNew(&in_format, &out_format, &at->converter); if (status != 0) { av_log(avctx, AV_LOG_ERROR, "AudioToolbox init error: %i\n", (int)status); return AVERROR_UNKNOWN; } if ((status = ffat_set_extradata(avctx)) < 0) return status; for (i = 0; i < (sizeof(at->channel_map) / sizeof(at->channel_map[0])); i++) at->channel_map[i] = i; ffat_update_ctx(avctx); if(!(at->decoded_data = av_malloc(av_get_bytes_per_sample(avctx->sample_fmt) * avctx->frame_size * avctx->ch_layout.nb_channels))) return AVERROR(ENOMEM); at->last_pts = AV_NOPTS_VALUE; return 0; } static av_cold int ffat_init_decoder(AVCodecContext *avctx) { ATDecodeContext *at = avctx->priv_data; if (avctx->extradata_size) { at->extradata = av_mallocz(avctx->extradata_size + AV_INPUT_BUFFER_PADDING_SIZE); if (!at->extradata) return AVERROR(ENOMEM); at->extradata_size = avctx->extradata_size; memcpy(at->extradata, avctx->extradata, avctx->extradata_size); } if ((avctx->ch_layout.nb_channels && avctx->sample_rate) || ffat_usable_extradata(avctx)) return ffat_create_decoder(avctx, NULL); else return 0; } static OSStatus ffat_decode_callback(AudioConverterRef converter, UInt32 *nb_packets, AudioBufferList *data, AudioStreamPacketDescription **packets, void *inctx) { AVCodecContext *avctx = inctx; ATDecodeContext *at = avctx->priv_data; if (at->eof) { *nb_packets = 0; if (packets) { *packets = &at->pkt_desc; at->pkt_desc.mDataByteSize = 0; } return 0; } av_packet_unref(&at->in_pkt); av_packet_move_ref(&at->in_pkt, &at->new_in_pkt); if (!at->in_pkt.data) { *nb_packets = 0; return 1; } data->mNumberBuffers = 1; data->mBuffers[0].mNumberChannels = 0; data->mBuffers[0].mDataByteSize = at->in_pkt.size; data->mBuffers[0].mData = at->in_pkt.data; *nb_packets = 1; if (packets) { *packets = &at->pkt_desc; at->pkt_desc.mDataByteSize = at->in_pkt.size; } return 0; } #define COPY_SAMPLES(type) \ type *in_ptr = (type*)at->decoded_data; \ type *end_ptr = in_ptr + frame->nb_samples * avctx->ch_layout.nb_channels; \ type *out_ptr = (type*)frame->data[0]; \ for (; in_ptr < end_ptr; in_ptr += avctx->ch_layout.nb_channels, out_ptr += avctx->ch_layout.nb_channels) { \ int c; \ for (c = 0; c < avctx->ch_layout.nb_channels; c++) \ out_ptr[c] = in_ptr[at->channel_map[c]]; \ } static void ffat_copy_samples(AVCodecContext *avctx, AVFrame *frame) { ATDecodeContext *at = avctx->priv_data; if (avctx->sample_fmt == AV_SAMPLE_FMT_S32) { COPY_SAMPLES(int32_t); } else { COPY_SAMPLES(int16_t); } } static int ffat_decode(AVCodecContext *avctx, AVFrame *frame, int *got_frame_ptr, AVPacket *avpkt) { ATDecodeContext *at = avctx->priv_data; int pkt_size = avpkt->size; OSStatus ret; AudioBufferList out_buffers; if (avctx->codec_id == AV_CODEC_ID_AAC) { if (!at->extradata_size) { uint8_t *side_data; size_t side_data_size; side_data = av_packet_get_side_data(avpkt, AV_PKT_DATA_NEW_EXTRADATA, &side_data_size); if (side_data_size) { at->extradata = av_mallocz(side_data_size + AV_INPUT_BUFFER_PADDING_SIZE); if (!at->extradata) return AVERROR(ENOMEM); at->extradata_size = side_data_size; memcpy(at->extradata, side_data, side_data_size); } } } if (!at->converter) { if ((ret = ffat_create_decoder(avctx, avpkt)) < 0) { return ret; } } out_buffers = (AudioBufferList){ .mNumberBuffers = 1, .mBuffers = { { .mNumberChannels = avctx->ch_layout.nb_channels, .mDataByteSize = av_get_bytes_per_sample(avctx->sample_fmt) * avctx->frame_size * avctx->ch_layout.nb_channels, } } }; av_packet_unref(&at->new_in_pkt); if (avpkt->size) { if ((ret = av_packet_ref(&at->new_in_pkt, avpkt)) < 0) { return ret; } } else { at->eof = 1; } frame->sample_rate = avctx->sample_rate; frame->nb_samples = avctx->frame_size; out_buffers.mBuffers[0].mData = at->decoded_data; ret = AudioConverterFillComplexBuffer(at->converter, ffat_decode_callback, avctx, &frame->nb_samples, &out_buffers, NULL); if ((!ret || ret == 1) && frame->nb_samples) { if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) return ret; ffat_copy_samples(avctx, frame); *got_frame_ptr = 1; if (at->last_pts != AV_NOPTS_VALUE) { frame->pts = at->last_pts; at->last_pts = avpkt->pts; } } else if (ret && ret != 1) { av_log(avctx, AV_LOG_WARNING, "Decode error: %i\n", ret); } else { at->last_pts = avpkt->pts; } return pkt_size; } static av_cold void ffat_decode_flush(AVCodecContext *avctx) { ATDecodeContext *at = avctx->priv_data; AudioConverterReset(at->converter); av_packet_unref(&at->new_in_pkt); av_packet_unref(&at->in_pkt); } static av_cold int ffat_close_decoder(AVCodecContext *avctx) { ATDecodeContext *at = avctx->priv_data; if (at->converter) AudioConverterDispose(at->converter); av_packet_unref(&at->new_in_pkt); av_packet_unref(&at->in_pkt); av_freep(&at->decoded_data); av_freep(&at->extradata); return 0; } #define FFAT_DEC_CLASS(NAME) \ static const AVClass ffat_##NAME##_dec_class = { \ .class_name = "at_" #NAME "_dec", \ .version = LIBAVUTIL_VERSION_INT, \ }; #define FFAT_DEC(NAME, ID, bsf_name) \ FFAT_DEC_CLASS(NAME) \ const FFCodec ff_##NAME##_at_decoder = { \ .p.name = #NAME "_at", \ CODEC_LONG_NAME(#NAME " (AudioToolbox)"), \ .p.type = AVMEDIA_TYPE_AUDIO, \ .p.id = ID, \ .priv_data_size = sizeof(ATDecodeContext), \ .init = ffat_init_decoder, \ .close = ffat_close_decoder, \ FF_CODEC_DECODE_CB(ffat_decode), \ .flush = ffat_decode_flush, \ .p.priv_class = &ffat_##NAME##_dec_class, \ .bsfs = bsf_name, \ .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY | AV_CODEC_CAP_CHANNEL_CONF, \ .caps_internal = FF_CODEC_CAP_INIT_CLEANUP, \ .p.wrapper_name = "at", \ }; FFAT_DEC(aac, AV_CODEC_ID_AAC, "aac_adtstoasc") FFAT_DEC(ac3, AV_CODEC_ID_AC3, NULL) FFAT_DEC(adpcm_ima_qt, AV_CODEC_ID_ADPCM_IMA_QT, NULL) FFAT_DEC(alac, AV_CODEC_ID_ALAC, NULL) FFAT_DEC(amr_nb, AV_CODEC_ID_AMR_NB, NULL) FFAT_DEC(eac3, AV_CODEC_ID_EAC3, NULL) FFAT_DEC(gsm_ms, AV_CODEC_ID_GSM_MS, NULL) FFAT_DEC(ilbc, AV_CODEC_ID_ILBC, NULL) FFAT_DEC(mp1, AV_CODEC_ID_MP1, NULL) FFAT_DEC(mp2, AV_CODEC_ID_MP2, NULL) FFAT_DEC(mp3, AV_CODEC_ID_MP3, NULL) FFAT_DEC(pcm_alaw, AV_CODEC_ID_PCM_ALAW, NULL) FFAT_DEC(pcm_mulaw, AV_CODEC_ID_PCM_MULAW, NULL) FFAT_DEC(qdmc, AV_CODEC_ID_QDMC, NULL) FFAT_DEC(qdm2, AV_CODEC_ID_QDM2, NULL)