diff options
-rw-r--r-- | doc/APIchanges | 5 | ||||
-rw-r--r-- | doc/multithreading.txt | 65 | ||||
-rw-r--r-- | ffplay.c | 1 | ||||
-rw-r--r-- | libavcodec/avcodec.h | 89 | ||||
-rw-r--r-- | libavcodec/options.c | 4 | ||||
-rw-r--r-- | libavcodec/pthread.c | 722 | ||||
-rw-r--r-- | libavcodec/thread.h | 111 | ||||
-rw-r--r-- | libavcodec/utils.c | 60 | ||||
-rw-r--r-- | libavcodec/w32thread.c | 6 | ||||
-rw-r--r-- | libavformat/utils.c | 6 | ||||
-rw-r--r-- | libavutil/internal.h | 11 |
11 files changed, 1068 insertions, 12 deletions
diff --git a/doc/APIchanges b/doc/APIchanges index c88d4d2db3..f14616e16c 100644 --- a/doc/APIchanges +++ b/doc/APIchanges @@ -13,6 +13,11 @@ libavutil: 2009-03-08 API changes, most recent first: +2011-02-09 - XXXXXXX - lavc 52.111.0 - threading API + Add CODEC_CAP_FRAME_THREADS with new restrictions on get_buffer()/ + release_buffer()/draw_horiz_band() callbacks for appropriate codecs. + Add thread_type and active_thread_type fields to AVCodecContext. + 2011-02-08 - 3940caa - lavf 52.98.0 - av_probe_input_buffer Add av_probe_input_buffer() to avformat.h for probing format from a ByteIOContext. diff --git a/doc/multithreading.txt b/doc/multithreading.txt new file mode 100644 index 0000000000..a1068425cd --- /dev/null +++ b/doc/multithreading.txt @@ -0,0 +1,65 @@ +FFmpeg multithreading methods +============================================== + +FFmpeg provides two methods for multithreading codecs. + +Slice threading decodes multiple parts of a frame at the same time, using +AVCodecContext execute() and execute2(). + +Frame threading decodes multiple frames at the same time. +It accepts N future frames and delays decoded pictures by N-1 frames. +The later frames are decoded in separate threads while the user is +displaying the current one. + +Restrictions on clients +============================================== + +Slice threading - +* The client's draw_horiz_band() must be thread-safe according to the comment + in avcodec.h. + +Frame threading - +* Restrictions with slice threading also apply. +* For best performance, the client should set thread_safe_callbacks if it + provides a thread-safe get_buffer() callback. +* There is one frame of delay added for every thread beyond the first one. + Clients must be able to handle this; the pkt_dts and pkt_pts fields in + AVFrame will work as usual. + +Restrictions on codec implementations +============================================== + +Slice threading - + None except that there must be something worth executing in parallel. + +Frame threading - +* Codecs can only accept entire pictures per packet. +* Codecs similar to ffv1, whose streams don't reset across frames, + will not work because their bitstreams cannot be decoded in parallel. + +* The contents of buffers must not be read before ff_thread_await_progress() + has been called on them. reget_buffer() and buffer age optimizations no longer work. +* The contents of buffers must not be written to after ff_thread_report_progress() + has been called on them. This includes draw_edges(). + +Porting codecs to frame threading +============================================== + +Find all context variables that are needed by the next frame. Move all +code changing them, as well as code calling get_buffer(), up to before +the decode process starts. Call ff_thread_finish_setup() afterwards. If +some code can't be moved, have update_thread_context() run it in the next +thread. + +If the codec allocates writable tables in its init(), add an init_thread_copy() +which re-allocates them for other threads. + +Add CODEC_CAP_FRAME_THREADS to the codec capabilities. There will be very little +speed gain at this point but it should work. + +Call ff_thread_report_progress() after some part of the current picture has decoded. +A good place to put this is where draw_horiz_band() is called - add this if it isn't +called anywhere, as it's useful too and the implementation is trivial when you're +doing this. Note that draw_edges() needs to be called before reporting progress. + +Before accessing a reference frame or its MVs, call ff_thread_await_progress(). @@ -1694,6 +1694,7 @@ static int input_init(AVFilterContext *ctx, const char *args, void *opaque) codec->get_buffer = input_get_buffer; codec->release_buffer = input_release_buffer; codec->reget_buffer = input_reget_buffer; + codec->thread_safe_callbacks = 1; } priv->frame = avcodec_alloc_frame(); diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h index c231804b34..9692288a16 100644 --- a/libavcodec/avcodec.h +++ b/libavcodec/avcodec.h @@ -32,7 +32,7 @@ #include "libavutil/cpu.h" #define LIBAVCODEC_VERSION_MAJOR 52 -#define LIBAVCODEC_VERSION_MINOR 110 +#define LIBAVCODEC_VERSION_MINOR 111 #define LIBAVCODEC_VERSION_MICRO 0 #define LIBAVCODEC_VERSION_INT AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \ @@ -728,6 +728,10 @@ typedef struct RcOverride{ * Codec is able to deal with negative linesizes */ #define CODEC_CAP_NEG_LINESIZES 0x0800 +/** + * Codec supports frame-level multithreading. + */ +#define CODEC_CAP_FRAME_THREADS 0x1000 //The following defines may change, don't expect compatibility if you use them. #define MB_TYPE_INTRA4x4 0x0001 @@ -1027,7 +1031,20 @@ typedef struct AVPanScan{ * - decoding: Read by user.\ */\ int64_t pkt_dts;\ - +\ + /**\ + * the AVCodecContext which ff_thread_get_buffer() was last called on\ + * - encoding: Set by libavcodec.\ + * - decoding: Set by libavcodec.\ + */\ + struct AVCodecContext *owner;\ +\ + /**\ + * used by multithreading to store frame-specific info\ + * - encoding: Set by libavcodec.\ + * - decoding: Set by libavcodec.\ + */\ + void *thread_opaque; #define FF_QSCALE_TYPE_MPEG1 0 #define FF_QSCALE_TYPE_MPEG2 1 @@ -1239,6 +1256,10 @@ typedef struct AVCodecContext { * decoder to draw a horizontal band. It improves cache usage. Not * all codecs can do that. You must check the codec capabilities * beforehand. + * When multithreading is used, it may be called from multiple threads + * at the same time; threads might draw different parts of the same AVFrame, + * or multiple AVFrames, and there is no guarantee that slices will be drawn + * in order. * The function is also used by hardware acceleration APIs. * It is called at least once during frame decoding to pass * the data needed for hardware render. @@ -1492,6 +1513,9 @@ typedef struct AVCodecContext { * if CODEC_CAP_DR1 is not set then get_buffer() must call * avcodec_default_get_buffer() instead of providing buffers allocated by * some other means. + * If frame multithreading is used and thread_safe_callbacks is set, + * it may be called from a different thread, but not from more than one at once. + * Does not need to be reentrant. * - encoding: unused * - decoding: Set by libavcodec, user can override. */ @@ -1501,6 +1525,8 @@ typedef struct AVCodecContext { * Called to release buffers which were allocated with get_buffer. * A released buffer can be reused in get_buffer(). * pic.data[*] must be set to NULL. + * May be called from a different thread if frame multithreading is used, + * but not by more than one thread at once, so does not need to be reentrant. * - encoding: unused * - decoding: Set by libavcodec, user can override. */ @@ -1804,6 +1830,7 @@ typedef struct AVCodecContext { #define FF_DEBUG_VIS_QP 0x00002000 #define FF_DEBUG_VIS_MB_TYPE 0x00004000 #define FF_DEBUG_BUFFERS 0x00008000 +#define FF_DEBUG_THREADS 0x00010000 /** * debug @@ -2827,6 +2854,44 @@ typedef struct AVCodecContext { * - encoding: unused */ AVPacket *pkt; + + /** + * Whether this is a copy of the context which had init() called on it. + * This is used by multithreading - shared tables and picture pointers + * should be freed from the original context only. + * - encoding: Set by libavcodec. + * - decoding: Set by libavcodec. + */ + int is_copy; + + /** + * Which multithreading methods to use. + * Use of FF_THREAD_FRAME will increase decoding delay by one frame per thread, + * so clients which cannot provide future frames should not use it. + * + * - encoding: Set by user, otherwise the default is used. + * - decoding: Set by user, otherwise the default is used. + */ + int thread_type; +#define FF_THREAD_FRAME 1 //< Decode more than one frame at once +#define FF_THREAD_SLICE 2 //< Decode more than one part of a single frame at once + + /** + * Which multithreading methods are in use by the codec. + * - encoding: Set by libavcodec. + * - decoding: Set by libavcodec. + */ + int active_thread_type; + + /** + * Set by the client if its custom get_buffer() callback can be called + * from another thread, which allows faster multithreaded decoding. + * draw_horiz_band() will be called from other threads regardless of this setting. + * Ignored if the default get_buffer() is used. + * - encoding: Set by user. + * - decoding: Set by user. + */ + int thread_safe_callbacks; } AVCodecContext; /** @@ -2879,6 +2944,26 @@ typedef struct AVCodec { uint8_t max_lowres; ///< maximum value for lowres supported by the decoder AVClass *priv_class; ///< AVClass for the private context const AVProfile *profiles; ///< array of recognized profiles, or NULL if unknown, array is terminated by {FF_PROFILE_UNKNOWN} + + /** + * @defgroup framethreading Frame-level threading support functions. + * @{ + */ + /** + * If defined, called on thread contexts when they are created. + * If the codec allocates writable tables in init(), re-allocate them here. + * priv_data will be set to a copy of the original. + */ + int (*init_thread_copy)(AVCodecContext *); + /** + * Copy necessary context variables from a previous thread context to the current one. + * If not defined, the next thread will start automatically; otherwise, the codec + * must call ff_thread_finish_setup(). + * + * dst and src will (rarely) point to the same context, in which case memcpy should be skipped. + */ + int (*update_thread_context)(AVCodecContext *dst, const AVCodecContext *src); + /** @} */ } AVCodec; /** diff --git a/libavcodec/options.c b/libavcodec/options.c index ff4cc2dad1..57e0804ce5 100644 --- a/libavcodec/options.c +++ b/libavcodec/options.c @@ -250,6 +250,7 @@ static const AVOption options[]={ {"vis_qp", "visualize quantization parameter (QP), lower QP are tinted greener", 0, FF_OPT_TYPE_CONST, FF_DEBUG_VIS_QP, INT_MIN, INT_MAX, V|D, "debug"}, {"vis_mb_type", "visualize block types", 0, FF_OPT_TYPE_CONST, FF_DEBUG_VIS_MB_TYPE, INT_MIN, INT_MAX, V|D, "debug"}, {"buffers", "picture buffer allocations", 0, FF_OPT_TYPE_CONST, FF_DEBUG_BUFFERS, INT_MIN, INT_MAX, V|D, "debug"}, +{"thread_ops", "threading operations", 0, FF_OPT_TYPE_CONST, FF_DEBUG_THREADS, INT_MIN, INT_MAX, V|D, "debug"}, {"vismv", "visualize motion vectors (MVs)", OFFSET(debug_mv), FF_OPT_TYPE_INT, DEFAULT, 0, INT_MAX, V|D, "debug_mv"}, {"pf", "forward predicted MVs of P-frames", 0, FF_OPT_TYPE_CONST, FF_DEBUG_VIS_MV_P_FOR, INT_MIN, INT_MAX, V|D, "debug_mv"}, {"bf", "forward predicted MVs of B-frames", 0, FF_OPT_TYPE_CONST, FF_DEBUG_VIS_MV_B_FOR, INT_MIN, INT_MAX, V|D, "debug_mv"}, @@ -431,6 +432,9 @@ static const AVOption options[]={ {"cholesky", NULL, 0, FF_OPT_TYPE_CONST, AV_LPC_TYPE_CHOLESKY, INT_MIN, INT_MAX, A|E, "lpc_type"}, {"lpc_passes", "number of passes to use for Cholesky factorization during LPC analysis", OFFSET(lpc_passes), FF_OPT_TYPE_INT, -1, INT_MIN, INT_MAX, A|E}, {"slices", "number of slices, used in parallelized decoding", OFFSET(slices), FF_OPT_TYPE_INT, 0, 0, INT_MAX, V|E}, +{"thread_type", "select multithreading type", OFFSET(thread_type), FF_OPT_TYPE_INT, FF_THREAD_SLICE|FF_THREAD_FRAME, 0, INT_MAX, V|E|D, "thread_type"}, +{"slice", NULL, 0, FF_OPT_TYPE_CONST, FF_THREAD_SLICE, INT_MIN, INT_MAX, V|E|D, "thread_type"}, +{"frame", NULL, 0, FF_OPT_TYPE_CONST, FF_THREAD_FRAME, INT_MIN, INT_MAX, V|E|D, "thread_type"}, {NULL}, }; diff --git a/libavcodec/pthread.c b/libavcodec/pthread.c index 1628b21a1f..00e419bf0c 100644 --- a/libavcodec/pthread.c +++ b/libavcodec/pthread.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2004 Roman Shaposhnik + * Copyright (c) 2008 Alexander Strange (astrange@ithinksw.com) * * Many thanks to Steven M. Schultz for providing clever ideas and * to Michael Niedermayer <michaelni@gmx.at> for writing initial @@ -21,9 +22,17 @@ * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ + +/** + * @file + * Multithreading support functions + * @see doc/multithreading.txt + */ + #include <pthread.h> #include "avcodec.h" +#include "thread.h" typedef int (action_func)(AVCodecContext *c, void *arg); typedef int (action_func2)(AVCodecContext *c, void *arg, int jobnr, int threadnr); @@ -45,6 +54,78 @@ typedef struct ThreadContext { int done; } ThreadContext; +/// Max number of frame buffers that can be allocated when using frame threads. +#define MAX_BUFFERS 32 + +/** + * Context used by codec threads and stored in their AVCodecContext thread_opaque. + */ +typedef struct PerThreadContext { + struct FrameThreadContext *parent; + + pthread_t thread; + pthread_cond_t input_cond; ///< Used to wait for a new packet from the main thread. + pthread_cond_t progress_cond; ///< Used by child threads to wait for progress to change. + pthread_cond_t output_cond; ///< Used by the main thread to wait for frames to finish. + + pthread_mutex_t mutex; ///< Mutex used to protect the contents of the PerThreadContext. + pthread_mutex_t progress_mutex; ///< Mutex used to protect frame progress values and progress_cond. + + AVCodecContext *avctx; ///< Context used to decode packets passed to this thread. + + AVPacket avpkt; ///< Input packet (for decoding) or output (for encoding). + int allocated_buf_size; ///< Size allocated for avpkt.data + + AVFrame frame; ///< Output frame (for decoding) or input (for encoding). + int got_frame; ///< The output of got_picture_ptr from the last avcodec_decode_video() call. + int result; ///< The result of the last codec decode/encode() call. + + enum { + STATE_INPUT_READY, ///< Set when the thread is awaiting a packet. + STATE_SETTING_UP, ///< Set before the codec has called ff_thread_finish_setup(). + STATE_GET_BUFFER, /**< + * Set when the codec calls get_buffer(). + * State is returned to STATE_SETTING_UP afterwards. + */ + STATE_SETUP_FINISHED ///< Set after the codec has called ff_thread_finish_setup(). + } state; + + /** + * Array of frames passed to ff_thread_release_buffer(). + * Frames are released after all threads referencing them are finished. + */ + AVFrame released_buffers[MAX_BUFFERS]; + int num_released_buffers; + + /** + * Array of progress values used by ff_thread_get_buffer(). + */ + int progress[MAX_BUFFERS][2]; + uint8_t progress_used[MAX_BUFFERS]; + + AVFrame *requested_frame; ///< AVFrame the codec passed to get_buffer() +} PerThreadContext; + +/** + * Context stored in the client AVCodecContext thread_opaque. + */ +typedef struct FrameThreadContext { + PerThreadContext *threads; ///< The contexts for each thread. + PerThreadContext *prev_thread; ///< The last thread submit_packet() was called on. + + pthread_mutex_t buffer_mutex; ///< Mutex used to protect get/release_buffer(). + + int next_decoding; ///< The next context to submit a packet to. + int next_finished; ///< The next context to return output from. + + int delaying; /**< + * Set for the first N packets, where N is the number of threads. + * While it is set, ff_thread_en/decode_frame won't return any results. + */ + + int die; ///< Set when threads should exit. +} FrameThreadContext; + static void* attribute_align_arg worker(void *v) { AVCodecContext *avctx = v; @@ -84,7 +165,7 @@ static av_always_inline void avcodec_thread_park_workers(ThreadContext *c, int t pthread_mutex_unlock(&c->current_job_lock); } -void avcodec_thread_free(AVCodecContext *avctx) +static void thread_free(AVCodecContext *avctx) { ThreadContext *c = avctx->thread_opaque; int i; @@ -109,6 +190,9 @@ static int avcodec_thread_execute(AVCodecContext *avctx, action_func* func, void ThreadContext *c= avctx->thread_opaque; int dummy_ret; + if (!(avctx->active_thread_type&FF_THREAD_SLICE) || avctx->thread_count <= 1) + return avcodec_default_execute(avctx, func, arg, ret, job_count, job_size); + if (job_count <= 0) return 0; @@ -140,12 +224,11 @@ static int avcodec_thread_execute2(AVCodecContext *avctx, action_func2* func2, v return avcodec_thread_execute(avctx, NULL, arg, ret, job_count, 0); } -int avcodec_thread_init(AVCodecContext *avctx, int thread_count) +static int thread_init(AVCodecContext *avctx) { int i; ThreadContext *c; - - avctx->thread_count = thread_count; + int thread_count = avctx->thread_count; if (thread_count <= 1) return 0; @@ -184,3 +267,634 @@ int avcodec_thread_init(AVCodecContext *avctx, int thread_count) avctx->execute2 = avcodec_thread_execute2; return 0; } + +/** + * Codec worker thread. + * + * Automatically calls ff_thread_finish_setup() if the codec does + * not provide an update_thread_context method, or if the codec returns + * before calling it. + */ +static attribute_align_arg void *frame_worker_thread(void *arg) +{ + PerThreadContext *p = arg; + FrameThreadContext *fctx = p->parent; + AVCodecContext *avctx = p->avctx; + AVCodec *codec = avctx->codec; + + while (1) { + if (p->state == STATE_INPUT_READY && !fctx->die) { + pthread_mutex_lock(&p->mutex); + while (p->state == STATE_INPUT_READY && !fctx->die) + pthread_cond_wait(&p->input_cond, &p->mutex); + pthread_mutex_unlock(&p->mutex); + } + + if (fctx->die) break; + + if (!codec->update_thread_context) ff_thread_finish_setup(avctx); + + pthread_mutex_lock(&p->mutex); + avcodec_get_frame_defaults(&p->frame); + p->got_frame = 0; + p->result = codec->decode(avctx, &p->frame, &p->got_frame, &p->avpkt); + + if (p->state == STATE_SETTING_UP) ff_thread_finish_setup(avctx); + + p->state = STATE_INPUT_READY; + + pthread_mutex_lock(&p->progress_mutex); + pthread_cond_signal(&p->output_cond); + pthread_mutex_unlock(&p->progress_mutex); + + pthread_mutex_unlock(&p->mutex); + } + + return NULL; +} + +/** + * Updates the next thread's AVCodecContext with values from the reference thread's context. + * + * @param dst The destination context. + * @param src The source context. + * @param for_user 0 if the destination is a codec thread, 1 if the destination is the user's thread + */ +static int update_context_from_thread(AVCodecContext *dst, AVCodecContext *src, int for_user) +{ + int err = 0; + + if (dst != src) { + dst->sub_id = src->sub_id; + dst->time_base = src->time_base; + dst->width = src->width; + dst->height = src->height; + dst->pix_fmt = src->pix_fmt; + + dst->has_b_frames = src->has_b_frames; + dst->idct_algo = src->idct_algo; + dst->slice_count = src->slice_count; + + dst->bits_per_coded_sample = src->bits_per_coded_sample; + dst->sample_aspect_ratio = src->sample_aspect_ratio; + dst->dtg_active_format = src->dtg_active_format; + + dst->profile = src->profile; + dst->level = src->level; + + dst->bits_per_raw_sample = src->bits_per_raw_sample; + dst->ticks_per_frame = src->ticks_per_frame; + dst->color_primaries = src->color_primaries; + + dst->color_trc = src->color_trc; + dst->colorspace = src->colorspace; + dst->color_range = src->color_range; + dst->chroma_sample_location = src->chroma_sample_location; + } + + if (for_user) { + dst->coded_frame = src->coded_frame; + dst->has_b_frames += src->thread_count - 1; + } else { + if (dst->codec->update_thread_context) + err = dst->codec->update_thread_context(dst, src); + } + + return err; +} + +/** + * Update the next thread's AVCodecContext with values set by the user. + * + * @param dst The destination context. + * @param src The source context. + */ +static void update_context_from_user(AVCodecContext *dst, AVCodecContext *src) +{ +#define copy_fields(s, e) memcpy(&dst->s, &src->s, (char*)&dst->e - (char*)&dst->s); + dst->flags = src->flags; + + dst->draw_horiz_band= src->draw_horiz_band; + dst->get_buffer = src->get_buffer; + dst->release_buffer = src->release_buffer; + + dst->opaque = src->opaque; + dst->hurry_up = src->hurry_up; + dst->dsp_mask = src->dsp_mask; + dst->debug = src->debug; + dst->debug_mv = src->debug_mv; + + dst->slice_flags = src->slice_flags; + dst->flags2 = src->flags2; + + copy_fields(skip_loop_filter, bidir_refine); + + dst->frame_number = src->frame_number; + dst->reordered_opaque = src->reordered_opaque; +#undef copy_fields +} + +static void free_progress(AVFrame *f) +{ + PerThreadContext *p = f->owner->thread_opaque; + int *progress = f->thread_opaque; + + p->progress_used[(progress - p->progress[0]) / 2] = 0; +} + +/// Releases the buffers that this decoding thread was the last user of. +static void release_delayed_buffers(PerThreadContext *p) +{ + FrameThreadContext *fctx = p->parent; + + while (p->num_released_buffers > 0) { + AVFrame *f = &p->released_buffers[--p->num_released_buffers]; + + pthread_mutex_lock(&fctx->buffer_mutex); + free_progress(f); + f->thread_opaque = NULL; + + f->owner->release_buffer(f->owner, f); + pthread_mutex_unlock(&fctx->buffer_mutex); + } +} + +static int submit_packet(PerThreadContext *p, AVPacket *avpkt) +{ + FrameThreadContext *fctx = p->parent; + PerThreadContext *prev_thread = fctx->prev_thread; + AVCodec *codec = p->avctx->codec; + uint8_t *buf = p->avpkt.data; + + if (!avpkt->size && !(codec->capabilities & CODEC_CAP_DELAY)) return 0; + + pthread_mutex_lock(&p->mutex); + + release_delayed_buffers(p); + + if (prev_thread) { + int err; + if (prev_thread->state == STATE_SETTING_UP) { + pthread_mutex_lock(&prev_thread->progress_mutex); + while (prev_thread->state == STATE_SETTING_UP) + pthread_cond_wait(&prev_thread->progress_cond, &prev_thread->progress_mutex); + pthread_mutex_unlock(&prev_thread->progress_mutex); + } + + err = update_context_from_thread(p->avctx, prev_thread->avctx, 0); + if (err) { + pthread_mutex_unlock(&p->mutex); + return err; + } + } + + av_fast_malloc(&buf, &p->allocated_buf_size, avpkt->size + FF_INPUT_BUFFER_PADDING_SIZE); + p->avpkt = *avpkt; + p->avpkt.data = buf; + memcpy(buf, avpkt->data, avpkt->size); + memset(buf + avpkt->size, 0, FF_INPUT_BUFFER_PADDING_SIZE); + + p->state = STATE_SETTING_UP; + pthread_cond_signal(&p->input_cond); + pthread_mutex_unlock(&p->mutex); + + /* + * If the client doesn't have a thread-safe get_buffer(), + * then decoding threads call back to the main thread, + * and it calls back to the client here. + */ + + if (!p->avctx->thread_safe_callbacks && + p->avctx->get_buffer != avcodec_default_get_buffer) { + while (p->state != STATE_SETUP_FINISHED && p->state != STATE_INPUT_READY) { + pthread_mutex_lock(&p->progress_mutex); + while (p->state == STATE_SETTING_UP) + pthread_cond_wait(&p->progress_cond, &p->progress_mutex); + + if (p->state == STATE_GET_BUFFER) { + p->result = p->avctx->get_buffer(p->avctx, p->requested_frame); + p->state = STATE_SETTING_UP; + pthread_cond_signal(&p->progress_cond); + } + pthread_mutex_unlock(&p->progress_mutex); + } + } + + fctx->prev_thread = p; + + return 0; +} + +int ff_thread_decode_frame(AVCodecContext *avctx, + AVFrame *picture, int *got_picture_ptr, + AVPacket *avpkt) +{ + FrameThreadContext *fctx = avctx->thread_opaque; + int finished = fctx->next_finished; + PerThreadContext *p; + int err; + + /* + * Submit a packet to the next decoding thread. + */ + + p = &fctx->threads[fctx->next_decoding]; + update_context_from_user(p->avctx, avctx); + err = submit_packet(p, avpkt); + if (err) return err; + + fctx->next_decoding++; + + /* + * If we're still receiving the initial packets, don't return a frame. + */ + + if (fctx->delaying && avpkt->size) { + if (fctx->next_decoding >= (avctx->thread_count-1)) fctx->delaying = 0; + + *got_picture_ptr=0; + return 0; + } + + /* + * Return the next available frame from the oldest thread. + * If we're at the end of the stream, then we have to skip threads that + * didn't output a frame, because we don't want to accidentally signal + * EOF (avpkt->size == 0 && *got_picture_ptr == 0). + */ + + do { + p = &fctx->threads[finished++]; + + if (p->state != STATE_INPUT_READY) { + pthread_mutex_lock(&p->progress_mutex); + while (p->state != STATE_INPUT_READY) + pthread_cond_wait(&p->output_cond, &p->progress_mutex); + pthread_mutex_unlock(&p->progress_mutex); + } + + *picture = p->frame; + *got_picture_ptr = p->got_frame; + picture->pkt_dts = p->avpkt.dts; + + /* + * A later call with avkpt->size == 0 may loop over all threads, + * including this one, searching for a frame to return before being + * stopped by the "finished != fctx->next_finished" condition. + * Make sure we don't mistakenly return the same frame again. + */ + p->got_frame = 0; + + if (finished >= avctx->thread_count) finished = 0; + } while (!avpkt->size && !*got_picture_ptr && finished != fctx->next_finished); + + update_context_from_thread(avctx, p->avctx, 1); + + if (fctx->next_decoding >= avctx->thread_count) fctx->next_decoding = 0; + + fctx->next_finished = finished; + + return p->result; +} + +void ff_thread_report_progress(AVFrame *f, int n, int field) +{ + PerThreadContext *p; + int *progress = f->thread_opaque; + + if (!progress || progress[field] >= n) return; + + p = f->owner->thread_opaque; + + if (f->owner->debug&FF_DEBUG_THREADS) + av_log(f->owner, AV_LOG_DEBUG, "%p finished %d field %d\n", progress, n, field); + + pthread_mutex_lock(&p->progress_mutex); + progress[field] = n; + pthread_cond_broadcast(&p->progress_cond); + pthread_mutex_unlock(&p->progress_mutex); +} + +void ff_thread_await_progress(AVFrame *f, int n, int field) +{ + PerThreadContext *p; + int *progress = f->thread_opaque; + + if (!progress || progress[field] >= n) return; + + p = f->owner->thread_opaque; + + if (f->owner->debug&FF_DEBUG_THREADS) + av_log(f->owner, AV_LOG_DEBUG, "thread awaiting %d field %d from %p\n", n, field, progress); + + pthread_mutex_lock(&p->progress_mutex); + while (progress[field] < n) + pthread_cond_wait(&p->progress_cond, &p->progress_mutex); + pthread_mutex_unlock(&p->progress_mutex); +} + +void ff_thread_finish_setup(AVCodecContext *avctx) { + PerThreadContext *p = avctx->thread_opaque; + + if (!(avctx->active_thread_type&FF_THREAD_FRAME)) return; + + pthread_mutex_lock(&p->progress_mutex); + p->state = STATE_SETUP_FINISHED; + pthread_cond_broadcast(&p->progress_cond); + pthread_mutex_unlock(&p->progress_mutex); +} + +/// Waits for all threads to finish. +static void park_frame_worker_threads(FrameThreadContext *fctx, int thread_count) +{ + int i; + + for (i = 0; i < thread_count; i++) { + PerThreadContext *p = &fctx->threads[i]; + + if (p->state != STATE_INPUT_READY) { + pthread_mutex_lock(&p->progress_mutex); + while (p->state != STATE_INPUT_READY) + pthread_cond_wait(&p->output_cond, &p->progress_mutex); + pthread_mutex_unlock(&p->progress_mutex); + } + } +} + +static void frame_thread_free(AVCodecContext *avctx, int thread_count) +{ + FrameThreadContext *fctx = avctx->thread_opaque; + AVCodec *codec = avctx->codec; + int i; + + park_frame_worker_threads(fctx, thread_count); + + if (fctx->prev_thread) + update_context_from_thread(fctx->threads->avctx, fctx->prev_thread->avctx, 0); + + fctx->die = 1; + + for (i = 0; i < thread_count; i++) { + PerThreadContext *p = &fctx->threads[i]; + + pthread_mutex_lock(&p->mutex); + pthread_cond_signal(&p->input_cond); + pthread_mutex_unlock(&p->mutex); + + pthread_join(p->thread, NULL); + + if (codec->close) + codec->close(p->avctx); + + avctx->codec = NULL; + + release_delayed_buffers(p); + } + + for (i = 0; i < thread_count; i++) { + PerThreadContext *p = &fctx->threads[i]; + + avcodec_default_free_buffers(p->avctx); + + pthread_mutex_destroy(&p->mutex); + pthread_mutex_destroy(&p->progress_mutex); + pthread_cond_destroy(&p->input_cond); + pthread_cond_destroy(&p->progress_cond); + pthread_cond_destroy(&p->output_cond); + av_freep(&p->avpkt.data); + + if (i) + av_freep(&p->avctx->priv_data); + + av_freep(&p->avctx); + } + + av_freep(&fctx->threads); + pthread_mutex_destroy(&fctx->buffer_mutex); + av_freep(&avctx->thread_opaque); +} + +static int frame_thread_init(AVCodecContext *avctx) +{ + int thread_count = avctx->thread_count; + AVCodec *codec = avctx->codec; + AVCodecContext *src = avctx; + FrameThreadContext *fctx; + int i, err = 0; + + avctx->thread_opaque = fctx = av_mallocz(sizeof(FrameThreadContext)); + + fctx->threads = av_mallocz(sizeof(PerThreadContext) * thread_count); + pthread_mutex_init(&fctx->buffer_mutex, NULL); + fctx->delaying = 1; + + for (i = 0; i < thread_count; i++) { + AVCodecContext *copy = av_malloc(sizeof(AVCodecContext)); + PerThreadContext *p = &fctx->threads[i]; + + pthread_mutex_init(&p->mutex, NULL); + pthread_mutex_init(&p->progress_mutex, NULL); + pthread_cond_init(&p->input_cond, NULL); + pthread_cond_init(&p->progress_cond, NULL); + pthread_cond_init(&p->output_cond, NULL); + + p->parent = fctx; + p->avctx = copy; + + *copy = *src; + copy->thread_opaque = p; + copy->pkt = &p->avpkt; + + if (!i) { + src = copy; + + if (codec->init) + err = codec->init(copy); + + update_context_from_thread(avctx, copy, 1); + } else { + copy->is_copy = 1; + copy->priv_data = av_malloc(codec->priv_data_size); + memcpy(copy->priv_data, src->priv_data, codec->priv_data_size); + + if (codec->init_thread_copy) + err = codec->init_thread_copy(copy); + } + + if (err) goto error; + + pthread_create(&p->thread, NULL, frame_worker_thread, p); + } + + return 0; + +error: + frame_thread_free(avctx, i+1); + + return err; +} + +void ff_thread_flush(AVCodecContext *avctx) +{ + FrameThreadContext *fctx = avctx->thread_opaque; + + if (!avctx->thread_opaque) return; + + park_frame_worker_threads(fctx, avctx->thread_count); + + if (fctx->prev_thread) + update_context_from_thread(fctx->threads->avctx, fctx->prev_thread->avctx, 0); + + fctx->next_decoding = fctx->next_finished = 0; + fctx->delaying = 1; + fctx->prev_thread = NULL; +} + +static int *allocate_progress(PerThreadContext *p) +{ + int i; + + for (i = 0; i < MAX_BUFFERS; i++) + if (!p->progress_used[i]) break; + + if (i == MAX_BUFFERS) { + av_log(p->avctx, AV_LOG_ERROR, "allocate_progress() overflow\n"); + return NULL; + } + + p->progress_used[i] = 1; + + return p->progress[i]; +} + +int ff_thread_get_buffer(AVCodecContext *avctx, AVFrame *f) +{ + PerThreadContext *p = avctx->thread_opaque; + int *progress, err; + + f->owner = avctx; + + if (!(avctx->active_thread_type&FF_THREAD_FRAME)) { + f->thread_opaque = NULL; + return avctx->get_buffer(avctx, f); + } + + if (p->state != STATE_SETTING_UP) { + av_log(avctx, AV_LOG_ERROR, "get_buffer() cannot be called after ff_thread_finish_setup()\n"); + return -1; + } + + pthread_mutex_lock(&p->parent->buffer_mutex); + f->thread_opaque = progress = allocate_progress(p); + + if (!progress) { + pthread_mutex_unlock(&p->parent->buffer_mutex); + return -1; + } + + progress[0] = + progress[1] = -1; + + if (avctx->thread_safe_callbacks || + avctx->get_buffer == avcodec_default_get_buffer) { + err = avctx->get_buffer(avctx, f); + } else { + p->requested_frame = f; + p->state = STATE_GET_BUFFER; + pthread_mutex_lock(&p->progress_mutex); + pthread_cond_signal(&p->progress_cond); + + while (p->state != STATE_SETTING_UP) + pthread_cond_wait(&p->progress_cond, &p->progress_mutex); + + err = p->result; + + pthread_mutex_unlock(&p->progress_mutex); + } + + pthread_mutex_unlock(&p->parent->buffer_mutex); + + /* + * Buffer age is difficult to keep track of between + * multiple threads, and the optimizations it allows + * are not worth the effort. It is disabled for now. + */ + f->age = INT_MAX; + + return err; +} + +void ff_thread_release_buffer(AVCodecContext *avctx, AVFrame *f) +{ + PerThreadContext *p = avctx->thread_opaque; + + if (!(avctx->active_thread_type&FF_THREAD_FRAME)) { + avctx->release_buffer(avctx, f); + return; + } + + if (p->num_released_buffers >= MAX_BUFFERS) { + av_log(p->avctx, AV_LOG_ERROR, "too many thread_release_buffer calls!\n"); + return; + } + + if(avctx->debug & FF_DEBUG_BUFFERS) + av_log(avctx, AV_LOG_DEBUG, "thread_release_buffer called on pic %p, %d buffers used\n", + f, f->owner->internal_buffer_count); + + p->released_buffers[p->num_released_buffers++] = *f; + memset(f->data, 0, sizeof(f->data)); +} + +/** + * Set the threading algorithms used. + * + * Threading requires more than one thread. + * Frame threading requires entire frames to be passed to the codec, + * and introduces extra decoding delay, so is incompatible with low_delay. + * + * @param avctx The context. + */ +static void validate_thread_parameters(AVCodecContext *avctx) +{ + int frame_threading_supported = (avctx->codec->capabilities & CODEC_CAP_FRAME_THREADS) + && !(avctx->flags & CODEC_FLAG_TRUNCATED) + && !(avctx->flags & CODEC_FLAG_LOW_DELAY) + && !(avctx->flags2 & CODEC_FLAG2_CHUNKS); + if (avctx->thread_count == 1) { + avctx->active_thread_type = 0; + } else if (frame_threading_supported && (avctx->thread_type & FF_THREAD_FRAME)) { + avctx->active_thread_type = FF_THREAD_FRAME; + } else { + avctx->active_thread_type = FF_THREAD_SLICE; + } +} + +int avcodec_thread_init(AVCodecContext *avctx, int thread_count) +{ + if (avctx->thread_opaque) { + av_log(avctx, AV_LOG_ERROR, "avcodec_thread_init is ignored after avcodec_open\n"); + return -1; + } + + avctx->thread_count = FFMAX(1, thread_count); + + if (avctx->codec) { + validate_thread_parameters(avctx); + + if (avctx->active_thread_type&FF_THREAD_SLICE) + return thread_init(avctx); + else if (avctx->active_thread_type&FF_THREAD_FRAME) + return frame_thread_init(avctx); + } + + return 0; +} + +void avcodec_thread_free(AVCodecContext *avctx) +{ + if (avctx->active_thread_type&FF_THREAD_FRAME) + frame_thread_free(avctx, avctx->thread_count); + else + thread_free(avctx); +} diff --git a/libavcodec/thread.h b/libavcodec/thread.h new file mode 100644 index 0000000000..d9186d6850 --- /dev/null +++ b/libavcodec/thread.h @@ -0,0 +1,111 @@ +/* + * Copyright (c) 2008 Alexander Strange <astrange@ithinksw.com> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * Multithreading support functions + * @author Alexander Strange <astrange@ithinksw.com> + */ + +#ifndef AVCODEC_THREAD_H +#define AVCODEC_THREAD_H + +#include "config.h" +#include "avcodec.h" + +/** + * Waits for decoding threads to finish and resets internal + * state. Called by avcodec_flush_buffers(). + * + * @param avctx The context. + */ +void ff_thread_flush(AVCodecContext *avctx); + +/** + * Submits a new frame to a decoding thread. + * Returns the next available frame in picture. *got_picture_ptr + * will be 0 if none is available. + * + * Parameters are the same as avcodec_decode_video2(). + */ +int ff_thread_decode_frame(AVCodecContext *avctx, AVFrame *picture, + int *got_picture_ptr, AVPacket *avpkt); + +/** + * If the codec defines update_thread_context(), call this + * when they are ready for the next thread to start decoding + * the next frame. After calling it, do not change any variables + * read by the update_thread_context() method, or call ff_thread_get_buffer(). + * + * @param avctx The context. + */ +void ff_thread_finish_setup(AVCodecContext *avctx); + +/** + * Notifies later decoding threads when part of their reference picture + * is ready. + * Call this when some part of the picture is finished decoding. + * Later calls with lower values of progress have no effect. + * + * @param f The picture being decoded. + * @param progress Value, in arbitrary units, of how much of the picture has decoded. + * @param field The field being decoded, for field-picture codecs. + * 0 for top field or frame pictures, 1 for bottom field. + */ +void ff_thread_report_progress(AVFrame *f, int progress, int field); + +/** + * Waits for earlier decoding threads to finish reference pictures + * Call this before accessing some part of a picture, with a given + * value for progress, and it will return after the responsible decoding + * thread calls ff_thread_report_progress() with the same or + * higher value for progress. + * + * @param f The picture being referenced. + * @param progress Value, in arbitrary units, to wait for. + * @param field The field being referenced, for field-picture codecs. + * 0 for top field or frame pictures, 1 for bottom field. + */ +void ff_thread_await_progress(AVFrame *f, int progress, int field); + +/** + * Wrapper around get_buffer() for frame-multithreaded codecs. + * Call this function instead of avctx->get_buffer(f). + * Cannot be called after the codec has called ff_thread_finish_setup(). + * + * @param avctx The current context. + * @param f The frame to write into. + */ +int ff_thread_get_buffer(AVCodecContext *avctx, AVFrame *f); + +/** + * Wrapper around release_buffer() frame-for multithreaded codecs. + * Call this function instead of avctx->release_buffer(f). + * The AVFrame will be copied and the actual release_buffer() call + * will be performed later. The contents of data pointed to by the + * AVFrame should not be changed until ff_thread_get_buffer() is called + * on it. + * + * @param avctx The current context. + * @param f The picture being released. + */ +void ff_thread_release_buffer(AVCodecContext *avctx, AVFrame *f); + +#endif /* AVCODEC_THREAD_H */ diff --git a/libavcodec/utils.c b/libavcodec/utils.c index e9db33e9d7..529369bd2a 100644 --- a/libavcodec/utils.c +++ b/libavcodec/utils.c @@ -37,6 +37,7 @@ #include "dsputil.h" #include "libavutil/opt.h" #include "imgconvert.h" +#include "thread.h" #include "audioconvert.h" #include "internal.h" #include <stdlib.h> @@ -261,6 +262,11 @@ int avcodec_default_get_buffer(AVCodecContext *s, AVFrame *pic){ (*picture_number)++; if(buf->base[0] && (buf->width != w || buf->height != h || buf->pix_fmt != s->pix_fmt)){ + if(s->active_thread_type&FF_THREAD_FRAME) { + av_log_missing_feature(s, "Width/height changing with frame threads is", 0); + return -1; + } + for(i=0; i<4; i++){ av_freep(&buf->base[i]); buf->data[i]= NULL; @@ -532,13 +538,21 @@ int attribute_align_arg avcodec_open(AVCodecContext *avctx, AVCodec *codec) goto free_and_end; } avctx->frame_number = 0; + + if (HAVE_THREADS && !avctx->thread_opaque) { + ret = avcodec_thread_init(avctx, avctx->thread_count); + if (ret < 0) { + goto free_and_end; + } + } + if (avctx->codec->max_lowres < avctx->lowres) { av_log(avctx, AV_LOG_ERROR, "The maximum value for lowres supported by the decoder is %d\n", avctx->codec->max_lowres); goto free_and_end; } - if(avctx->codec->init){ + if(avctx->codec->init && !(avctx->active_thread_type&FF_THREAD_FRAME)){ ret = avctx->codec->init(avctx); if (ret < 0) { goto free_and_end; @@ -636,14 +650,18 @@ int attribute_align_arg avcodec_decode_video2(AVCodecContext *avctx, AVFrame *pi avctx->pkt = avpkt; - if((avctx->codec->capabilities & CODEC_CAP_DELAY) || avpkt->size){ - ret = avctx->codec->decode(avctx, picture, got_picture_ptr, - avpkt); + if((avctx->codec->capabilities & CODEC_CAP_DELAY) || avpkt->size || (avctx->active_thread_type&FF_THREAD_FRAME)){ + if (HAVE_PTHREADS && avctx->active_thread_type&FF_THREAD_FRAME) + ret = ff_thread_decode_frame(avctx, picture, got_picture_ptr, + avpkt); + else { + ret = avctx->codec->decode(avctx, picture, got_picture_ptr, + avpkt); + picture->pkt_dts= avpkt->dts; + } emms_c(); //needed to avoid an emms_c() call before every return; - picture->pkt_dts= avpkt->dts; - if (*got_picture_ptr) avctx->frame_number++; }else @@ -768,6 +786,7 @@ av_cold int avcodec_close(AVCodecContext *avctx) if(avctx->codec && avctx->codec->encode) av_freep(&avctx->extradata); avctx->codec = NULL; + avctx->active_thread_type = 0; entangled_thread_counter--; /* Release any user-supplied mutex. */ @@ -1029,6 +1048,8 @@ void avcodec_init(void) void avcodec_flush_buffers(AVCodecContext *avctx) { + if(HAVE_PTHREADS && avctx->active_thread_type&FF_THREAD_FRAME) + ff_thread_flush(avctx); if(avctx->codec->flush) avctx->codec->flush(avctx); } @@ -1229,3 +1250,30 @@ unsigned int ff_toupper4(unsigned int x) + (toupper((x>>16)&0xFF)<<16) + (toupper((x>>24)&0xFF)<<24); } + +#if !HAVE_PTHREADS + +int ff_thread_get_buffer(AVCodecContext *avctx, AVFrame *f) +{ + f->owner = avctx; + return avctx->get_buffer(avctx, f); +} + +void ff_thread_release_buffer(AVCodecContext *avctx, AVFrame *f) +{ + f->owner->release_buffer(f->owner, f); +} + +void ff_thread_finish_setup(AVCodecContext *avctx) +{ +} + +void ff_thread_report_progress(AVFrame *f, int progress, int field) +{ +} + +void ff_thread_await_progress(AVFrame *f, int progress, int field) +{ +} + +#endif diff --git a/libavcodec/w32thread.c b/libavcodec/w32thread.c index f7a1430647..007508e409 100644 --- a/libavcodec/w32thread.c +++ b/libavcodec/w32thread.c @@ -129,7 +129,13 @@ int avcodec_thread_init(AVCodecContext *s, int thread_count){ ThreadContext *c; uint32_t threadid; + if(!(s->thread_type & FF_THREAD_SLICE)){ + av_log(s, AV_LOG_WARNING, "The requested thread algorithm is not supported with this thread library.\n"); + return 0; + } + s->thread_count= thread_count; + s->active_thread_type= FF_THREAD_SLICE; if (thread_count <= 1) return 0; diff --git a/libavformat/utils.c b/libavformat/utils.c index 8c3311f3bc..c21b922401 100644 --- a/libavformat/utils.c +++ b/libavformat/utils.c @@ -930,6 +930,12 @@ static void compute_pkt_fields(AVFormatContext *s, AVStream *st, /* do we have a video B-frame ? */ delay= st->codec->has_b_frames; presentation_delayed = 0; + + // ignore delay caused by frame threading so that the mpeg2-without-dts + // warning will not trigger + if (delay && st->codec->active_thread_type&FF_THREAD_FRAME) + delay -= st->codec->thread_count-1; + /* XXX: need has_b_frame, but cannot get it if the codec is not initialized */ if (delay && diff --git a/libavutil/internal.h b/libavutil/internal.h index 36d3e719f6..4c98a1299f 100644 --- a/libavutil/internal.h +++ b/libavutil/internal.h @@ -210,4 +210,15 @@ type ff_##name args #endif +/** + * Returns NULL if a threading library has not been enabled. + * Used to disable threading functions in AVCodec definitions + * when not needed. + */ +#if HAVE_THREADS +# define ONLY_IF_THREADS_ENABLED(x) x +#else +# define ONLY_IF_THREADS_ENABLED(x) NULL +#endif + #endif /* AVUTIL_INTERNAL_H */ |