summaryrefslogtreecommitdiff
path: root/libavformat/subtitles.h
blob: 903c24d9dfcce5519045e4d80542460ac0070419 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
/*
 * Copyright (c) 2012 Clément Bœsch
 *
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

#ifndef AVFORMAT_SUBTITLES_H
#define AVFORMAT_SUBTITLES_H

#include <stdint.h>
#include <stddef.h>
#include "avformat.h"
#include "libavutil/bprint.h"

enum sub_sort {
    SUB_SORT_TS_POS = 0,    ///< sort by timestamps, then position
    SUB_SORT_POS_TS,        ///< sort by position, then timestamps
};

enum ff_utf_type {
    FF_UTF_8,       // or other 8 bit encodings
    FF_UTF16LE,
    FF_UTF16BE,
};

typedef struct {
    int type;
    AVIOContext *pb;
    unsigned char buf[8];
    int buf_pos, buf_len;
    AVIOContext buf_pb;
} FFTextReader;

/**
 * Initialize the FFTextReader from the given AVIOContext. This function will
 * read some bytes from pb, and test for UTF-8 or UTF-16 BOMs. Further accesses
 * to FFTextReader will read more data from pb.
 *
 * The purpose of FFTextReader is to transparently convert read data to UTF-8
 * if the stream had a UTF-16 BOM.
 *
 * @param r object which will be initialized
 * @param pb stream to read from (referenced as long as FFTextReader is in use)
 */
void ff_text_init_avio(FFTextReader *r, AVIOContext *pb);

/**
 * Similar to ff_text_init_avio(), but sets it up to read from a bounded buffer.
 *
 * @param r object which will be initialized
 * @param buf buffer to read from (referenced as long as FFTextReader is in use)
 * @param size size of buf
 */
void ff_text_init_buf(FFTextReader *r, void *buf, size_t size);

/**
 * Return the byte position of the next byte returned by ff_text_r8(). For
 * UTF-16 source streams, this will return the original position, but it will
 * be incorrect if a codepoint was only partially read with ff_text_r8().
 */
int64_t ff_text_pos(FFTextReader *r);

/**
 * Return the next byte. The return value is always 0 - 255. Returns 0 on EOF.
 * If the source stream is UTF-16, this reads from the stream converted to
 * UTF-8. On invalid UTF-16, 0 is returned.
 */
int ff_text_r8(FFTextReader *r);

/**
 * Return non-zero if EOF was reached.
 */
int ff_text_eof(FFTextReader *r);

/**
 * Like ff_text_r8(), but don't remove the byte from the buffer.
 */
int ff_text_peek_r8(FFTextReader *r);

/**
 * Read the given number of bytes (in UTF-8). On error or EOF, \0 bytes are
 * written.
 */
void ff_text_read(FFTextReader *r, char *buf, size_t size);

typedef struct {
    AVPacket *subs;         ///< array of subtitles packets
    int nb_subs;            ///< number of subtitles packets
    int allocated_size;     ///< allocated size for subs
    int current_sub_idx;    ///< current position for the read packet callback
    enum sub_sort sort;     ///< sort method to use when finalizing subtitles
} FFDemuxSubtitlesQueue;

/**
 * Insert a new subtitle event.
 *
 * @param event the subtitle line, may not be zero terminated
 * @param len   the length of the event (in strlen() sense, so without '\0')
 * @param merge set to 1 if the current event should be concatenated with the
 *              previous one instead of adding a new entry, 0 otherwise
 */
AVPacket *ff_subtitles_queue_insert(FFDemuxSubtitlesQueue *q,
                                    const uint8_t *event, int len, int merge);

/**
 * Set missing durations and sort subtitles by PTS, and then byte position.
 */
void ff_subtitles_queue_finalize(FFDemuxSubtitlesQueue *q);

/**
 * Generic read_packet() callback for subtitles demuxers using this queue
 * system.
 */
int ff_subtitles_queue_read_packet(FFDemuxSubtitlesQueue *q, AVPacket *pkt);

/**
 * Update current_sub_idx to emulate a seek. Except the first parameter, it
 * matches AVInputFormat->read_seek2 prototypes.
 */
int ff_subtitles_queue_seek(FFDemuxSubtitlesQueue *q, AVFormatContext *s, int stream_index,
                            int64_t min_ts, int64_t ts, int64_t max_ts, int flags);

/**
 * Remove and destroy all the subtitles packets.
 */
void ff_subtitles_queue_clean(FFDemuxSubtitlesQueue *q);

/**
 * SMIL helper to load next chunk ("<...>" or untagged content) in buf.
 *
 * @param c cached character, to avoid a backward seek
 */
int ff_smil_extract_next_text_chunk(FFTextReader *tr, AVBPrint *buf, char *c);

/**
 * SMIL helper to point on the value of an attribute in the given tag.
 *
 * @param s    SMIL tag ("<...>")
 * @param attr the attribute to look for
 */
const char *ff_smil_get_attr_ptr(const char *s, const char *attr);

/**
 * @brief Same as ff_subtitles_read_text_chunk(), but read from an AVIOContext.
 */
void ff_subtitles_read_chunk(AVIOContext *pb, AVBPrint *buf);

/**
 * @brief Read a subtitles chunk from FFTextReader.
 *
 * A chunk is defined by a multiline "event", ending with a second line break.
 * The trailing line breaks are trimmed. CRLF are supported.
 * Example: "foo\r\nbar\r\n\r\nnext" will print "foo\r\nbar" into buf, and pb
 * will focus on the 'n' of the "next" string.
 *
 * @param tr  I/O context
 * @param buf an initialized buf where the chunk is written
 *
 * @note buf is cleared before writing into it.
 */
void ff_subtitles_read_text_chunk(FFTextReader *tr, AVBPrint *buf);

/**
 * Get the number of characters to increment to jump to the next line, or to
 * the end of the string.
 * The function handles the following line breaks schemes:
 * LF, CRLF (MS), or standalone CR (old MacOS).
 */
static av_always_inline int ff_subtitles_next_line(const char *ptr)
{
    int n = strcspn(ptr, "\r\n");
    ptr += n;
    if (*ptr == '\r') {
        ptr++;
        n++;
    }
    if (*ptr == '\n')
        n++;
    return n;
}

/**
 * Read a line of text. Discards line ending characters.
 * The function handles the following line breaks schemes:
 * LF, CRLF (MS), or standalone CR (old MacOS).
 *
 * Returns the number of bytes written to buf. Always writes a terminating 0,
 * similar as with snprintf.
 *
 * @note returns a negative error code if a \0 byte is found
 */
ptrdiff_t ff_subtitles_read_line(FFTextReader *tr, char *buf, size_t size);

#endif /* AVFORMAT_SUBTITLES_H */