diff options
Diffstat (limited to 'libavformat/tedcaptionsdec.c')
-rw-r--r-- | libavformat/tedcaptionsdec.c | 366 |
1 files changed, 366 insertions, 0 deletions
diff --git a/libavformat/tedcaptionsdec.c b/libavformat/tedcaptionsdec.c new file mode 100644 index 0000000000..68063fafbe --- /dev/null +++ b/libavformat/tedcaptionsdec.c @@ -0,0 +1,366 @@ +/* + * TED Talks captions format decoder + * Copyright (c) 2012 Nicolas George + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/bprint.h" +#include "libavutil/log.h" +#include "libavutil/opt.h" +#include "avformat.h" +#include "internal.h" +#include "subtitles.h" + +typedef struct { + AVClass *class; + int64_t start_time; + FFDemuxSubtitlesQueue subs; +} TEDCaptionsDemuxer; + +static const AVOption tedcaptions_options[] = { + { "start_time", "set the start time (offset) of the subtitles, in ms", + offsetof(TEDCaptionsDemuxer, start_time), FF_OPT_TYPE_INT64, + { .i64 = 15000 }, INT64_MIN, INT64_MAX, + AV_OPT_FLAG_SUBTITLE_PARAM | AV_OPT_FLAG_DECODING_PARAM }, + { NULL }, +}; + +static const AVClass tedcaptions_demuxer_class = { + .class_name = "tedcaptions_demuxer", + .item_name = av_default_item_name, + .option = tedcaptions_options, + .version = LIBAVUTIL_VERSION_INT, +}; + +#define BETWEEN(a, amin, amax) ((unsigned)((a) - (amin)) <= (amax) - (amin)) + +#define HEX_DIGIT_TEST(c) (BETWEEN(c, '0', '9') || BETWEEN((c) | 32, 'a', 'z')) +#define HEX_DIGIT_VAL(c) ((c) <= '9' ? (c) - '0' : ((c) | 32) - 'a' + 10) +#define ERR_CODE(c) (c < 0 ? c : AVERROR_INVALIDDATA) + +static void av_bprint_utf8(AVBPrint *bp, unsigned c) +{ + int bytes, i; + + if (c <= 0x7F) { + av_bprint_chars(bp, c, 1); + return; + } + bytes = (av_log2(c) - 2) / 5; + av_bprint_chars(bp, (c >> (bytes * 6)) | ((0xFF80 >> bytes) & 0xFF), 1); + for (i = bytes - 1; i >= 0; i--) + av_bprint_chars(bp, ((c >> (i * 6)) & 0x3F) | 0x80, 1); +} + +static void next_byte(AVIOContext *pb, int *cur_byte) +{ + uint8_t b; + int ret = avio_read(pb, &b, 1); + *cur_byte = ret > 0 ? b : ret == 0 ? AVERROR_EOF : ret; +} + +static void skip_spaces(AVIOContext *pb, int *cur_byte) +{ + while (*cur_byte == ' ' || *cur_byte == '\t' || + *cur_byte == '\n' || *cur_byte == '\r') + next_byte(pb, cur_byte); +} + +static int expect_byte(AVIOContext *pb, int *cur_byte, uint8_t c) +{ + skip_spaces(pb, cur_byte); + if (*cur_byte != c) + return ERR_CODE(*cur_byte); + next_byte(pb, cur_byte); + return 0; +} + +static int parse_string(AVIOContext *pb, int *cur_byte, AVBPrint *bp, int full) +{ + int ret; + + av_bprint_init(bp, 0, full ? -1 : 1); + ret = expect_byte(pb, cur_byte, '"'); + if (ret < 0) + goto fail; + while (*cur_byte > 0 && *cur_byte != '"') { + if (*cur_byte == '\\') { + next_byte(pb, cur_byte); + if (*cur_byte < 0) { + ret = AVERROR_INVALIDDATA; + goto fail; + } + if ((*cur_byte | 32) == 'u') { + unsigned chr = 0, i; + for (i = 0; i < 4; i++) { + next_byte(pb, cur_byte); + if (!HEX_DIGIT_TEST(*cur_byte)) { + ret = ERR_CODE(*cur_byte); + goto fail; + } + chr = chr * 16 + HEX_DIGIT_VAL(*cur_byte); + } + av_bprint_utf8(bp, chr); + } else { + av_bprint_chars(bp, *cur_byte, 1); + } + } else { + av_bprint_chars(bp, *cur_byte, 1); + } + next_byte(pb, cur_byte); + } + ret = expect_byte(pb, cur_byte, '"'); + if (ret < 0) + goto fail; + if (full && !av_bprint_is_complete(bp)) { + ret = AVERROR(ENOMEM); + goto fail; + } + return 0; + +fail: + av_bprint_finalize(bp, NULL); + return ret; +} + +static int parse_label(AVIOContext *pb, int *cur_byte, AVBPrint *bp) +{ + int ret; + + ret = parse_string(pb, cur_byte, bp, 0); + if (ret < 0) + return ret; + ret = expect_byte(pb, cur_byte, ':'); + if (ret < 0) + return ret; + return 0; +} + +static int parse_boolean(AVIOContext *pb, int *cur_byte, int *result) +{ + static const char * const text[] = { "false", "true" }; + const char *p; + int i; + + skip_spaces(pb, cur_byte); + for (i = 0; i < 2; i++) { + p = text[i]; + if (*cur_byte != *p) + continue; + for (; *p; p++, next_byte(pb, cur_byte)) + if (*cur_byte != *p) + return AVERROR_INVALIDDATA; + if (BETWEEN(*cur_byte | 32, 'a', 'z')) + return AVERROR_INVALIDDATA; + *result = i; + return 0; + } + return AVERROR_INVALIDDATA; +} + +static int parse_int(AVIOContext *pb, int *cur_byte, int64_t *result) +{ + int64_t val = 0; + + skip_spaces(pb, cur_byte); + if ((unsigned)*cur_byte - '0' > 9) + return AVERROR_INVALIDDATA; + while (BETWEEN(*cur_byte, '0', '9')) { + val = val * 10 + (*cur_byte - '0'); + next_byte(pb, cur_byte); + } + *result = val; + return 0; +} + +static int parse_file(AVIOContext *pb, FFDemuxSubtitlesQueue *subs) +{ + int ret, cur_byte, start_of_par; + AVBPrint label, content; + int64_t pos, start, duration; + AVPacket *pkt; + + next_byte(pb, &cur_byte); + ret = expect_byte(pb, &cur_byte, '{'); + if (ret < 0) + return AVERROR_INVALIDDATA; + ret = parse_label(pb, &cur_byte, &label); + if (ret < 0 || strcmp(label.str, "captions")) + return AVERROR_INVALIDDATA; + ret = expect_byte(pb, &cur_byte, '['); + if (ret < 0) + return AVERROR_INVALIDDATA; + while (1) { + content.size = 0; + start = duration = AV_NOPTS_VALUE; + ret = expect_byte(pb, &cur_byte, '{'); + if (ret < 0) + return ret; + pos = avio_tell(pb) - 1; + while (1) { + ret = parse_label(pb, &cur_byte, &label); + if (ret < 0) + return ret; + if (!strcmp(label.str, "startOfParagraph")) { + ret = parse_boolean(pb, &cur_byte, &start_of_par); + if (ret < 0) + return ret; + } else if (!strcmp(label.str, "content")) { + ret = parse_string(pb, &cur_byte, &content, 1); + if (ret < 0) + return ret; + } else if (!strcmp(label.str, "startTime")) { + ret = parse_int(pb, &cur_byte, &start); + if (ret < 0) + return ret; + } else if (!strcmp(label.str, "duration")) { + ret = parse_int(pb, &cur_byte, &duration); + if (ret < 0) + return ret; + } else { + return AVERROR_INVALIDDATA; + } + skip_spaces(pb, &cur_byte); + if (cur_byte != ',') + break; + next_byte(pb, &cur_byte); + } + ret = expect_byte(pb, &cur_byte, '}'); + if (ret < 0) + return ret; + + if (!content.size || start == AV_NOPTS_VALUE || + duration == AV_NOPTS_VALUE) + return AVERROR_INVALIDDATA; + pkt = ff_subtitles_queue_insert(subs, content.str, content.len, 0); + if (!pkt) + return AVERROR(ENOMEM); + pkt->pos = pos; + pkt->pts = start; + pkt->duration = duration; + av_bprint_finalize(&content, NULL); + + skip_spaces(pb, &cur_byte); + if (cur_byte != ',') + break; + next_byte(pb, &cur_byte); + } + ret = expect_byte(pb, &cur_byte, ']'); + if (ret < 0) + return ret; + ret = expect_byte(pb, &cur_byte, '}'); + if (ret < 0) + return ret; + skip_spaces(pb, &cur_byte); + if (cur_byte != AVERROR_EOF) + return ERR_CODE(cur_byte); + return 0; +} + +static av_cold int tedcaptions_read_header(AVFormatContext *avf) +{ + TEDCaptionsDemuxer *tc = avf->priv_data; + AVStream *st; + int ret, i; + AVPacket *last; + + ret = parse_file(avf->pb, &tc->subs); + if (ret < 0) { + if (ret == AVERROR_INVALIDDATA) + av_log(avf, AV_LOG_ERROR, "Syntax error near offset %"PRId64".\n", + avio_tell(avf->pb)); + ff_subtitles_queue_clean(&tc->subs); + return ret; + } + ff_subtitles_queue_finalize(&tc->subs); + for (i = 0; i < tc->subs.nb_subs; i++) + tc->subs.subs[i].pts += tc->start_time; + + last = &tc->subs.subs[tc->subs.nb_subs - 1]; + st = avformat_new_stream(avf, NULL); + if (!st) + return AVERROR(ENOMEM); + st->codec->codec_type = AVMEDIA_TYPE_SUBTITLE; + st->codec->codec_id = AV_CODEC_ID_TEXT; + avpriv_set_pts_info(st, 64, 1, 1000); + st->probe_packets = 0; + st->start_time = 0; + st->duration = last->pts + last->duration; + st->cur_dts = 0; + + return 0; +} + +static int tedcaptions_read_packet(AVFormatContext *avf, AVPacket *packet) +{ + TEDCaptionsDemuxer *tc = avf->priv_data; + + return ff_subtitles_queue_read_packet(&tc->subs, packet); +} + +static int tedcaptions_read_close(AVFormatContext *avf) +{ + TEDCaptionsDemuxer *tc = avf->priv_data; + + ff_subtitles_queue_clean(&tc->subs); + return 0; +} + +static av_cold int tedcaptions_read_probe(AVProbeData *p) +{ + static const char *const tags[] = { + "\"captions\"", "\"duration\"", "\"content\"", + "\"startOfParagraph\"", "\"startTime\"", + }; + unsigned i, count = 0; + const char *t; + + if (p->buf[strspn(p->buf, " \t\r\n")] != '{') + return 0; + for (i = 0; i < FF_ARRAY_ELEMS(tags); i++) { + if (!(t = strstr(p->buf, tags[i]))) + continue; + t += strlen(tags[i]); + t += strspn(t, " \t\r\n"); + if (*t == ':') + count++; + } + return count == FF_ARRAY_ELEMS(tags) ? AVPROBE_SCORE_MAX : + count ? AVPROBE_SCORE_EXTENSION : 0; +} + +static int tedcaptions_read_seek(AVFormatContext *avf, int stream_index, + int64_t min_ts, int64_t ts, int64_t max_ts, + int flags) +{ + TEDCaptionsDemuxer *tc = avf->priv_data; + return ff_subtitles_queue_seek(&tc->subs, avf, stream_index, + min_ts, ts, max_ts, flags); +} + +AVInputFormat ff_tedcaptions_demuxer = { + .name = "tedcaptions", + .long_name = NULL_IF_CONFIG_SMALL("TED Talks captions"), + .priv_data_size = sizeof(TEDCaptionsDemuxer), + .priv_class = &tedcaptions_demuxer_class, + .read_header = tedcaptions_read_header, + .read_packet = tedcaptions_read_packet, + .read_close = tedcaptions_read_close, + .read_probe = tedcaptions_read_probe, + .read_seek2 = tedcaptions_read_seek, +}; |