diff options
author | Ben Avison <bavison@riscosopen.org> | 2013-08-05 13:12:47 +0100 |
---|---|---|
committer | Martin Storsjö <martin@martin.st> | 2013-08-08 12:08:30 +0300 |
commit | 218d6844b37d339ffbf2044ad07d8be7767e2734 (patch) | |
tree | bc01481714ca5d7550ecb00d454a4c220cf1e959 | |
parent | 7a82022ee2f9b1fad991ace0936901e7419444be (diff) | |
download | ffmpeg-218d6844b37d339ffbf2044ad07d8be7767e2734.tar.gz |
h264dsp: Factorize code into a new function, h264_find_start_code_candidate
This performs the start code search which was previously part of
h264_find_frame_end() - the most CPU intensive part of the function.
By itself, this results in a performance regression:
Before After
Mean StdDev Mean StdDev Change
Overall time 2925.6 26.2 3068.5 31.7 -4.7%
but this can more than be made up for by platform-optimised
implementations of the function.
Signed-off-by: Martin Storsjö <martin@martin.st>
-rw-r--r-- | libavcodec/h264_parser.c | 27 | ||||
-rw-r--r-- | libavcodec/h264dsp.c | 29 | ||||
-rw-r--r-- | libavcodec/h264dsp.h | 9 |
3 files changed, 41 insertions, 24 deletions
diff --git a/libavcodec/h264_parser.c b/libavcodec/h264_parser.c index da2a5f99db..ef5da98934 100644 --- a/libavcodec/h264_parser.c +++ b/libavcodec/h264_parser.c @@ -47,30 +47,9 @@ static int h264_find_frame_end(H264Context *h, const uint8_t *buf, for (i = 0; i < buf_size; i++) { if (state == 7) { -#if HAVE_FAST_UNALIGNED - /* we check i < buf_size instead of i + 3 / 7 because it is - * simpler and there must be FF_INPUT_BUFFER_PADDING_SIZE - * bytes at the end. - */ -#if HAVE_FAST_64BIT - while (i < buf_size && - !((~*(const uint64_t *)(buf + i) & - (*(const uint64_t *)(buf + i) - 0x0101010101010101ULL)) & - 0x8080808080808080ULL)) - i += 8; -#else - while (i < buf_size && - !((~*(const uint32_t *)(buf + i) & - (*(const uint32_t *)(buf + i) - 0x01010101U)) & - 0x80808080U)) - i += 4; -#endif -#endif - for (; i < buf_size; i++) - if (!buf[i]) { - state = 2; - break; - } + i += h->h264dsp.h264_find_start_code_candidate(buf + i, buf_size - i); + if (i < buf_size) + state = 2; } else if (state <= 2) { if (buf[i] == 1) state ^= 5; // 2->7, 1->4, 0->5 diff --git a/libavcodec/h264dsp.c b/libavcodec/h264dsp.c index 3ca6abefda..a901dbb9e1 100644 --- a/libavcodec/h264dsp.c +++ b/libavcodec/h264dsp.c @@ -53,6 +53,34 @@ #include "h264addpx_template.c" #undef BIT_DEPTH +static int h264_find_start_code_candidate_c(const uint8_t *buf, int size) +{ + int i = 0; +#if HAVE_FAST_UNALIGNED + /* we check i < size instead of i + 3 / 7 because it is + * simpler and there must be FF_INPUT_BUFFER_PADDING_SIZE + * bytes at the end. + */ +#if HAVE_FAST_64BIT + while (i < size && + !((~*(const uint64_t *)(buf + i) & + (*(const uint64_t *)(buf + i) - 0x0101010101010101ULL)) & + 0x8080808080808080ULL)) + i += 8; +#else + while (i < size && + !((~*(const uint32_t *)(buf + i) & + (*(const uint32_t *)(buf + i) - 0x01010101U)) & + 0x80808080U)) + i += 4; +#endif +#endif + for (; i < size; i++) + if (!buf[i]) + break; + return i; +} + av_cold void ff_h264dsp_init(H264DSPContext *c, const int bit_depth, const int chroma_format_idc) { @@ -133,6 +161,7 @@ av_cold void ff_h264dsp_init(H264DSPContext *c, const int bit_depth, H264_DSP(8); break; } + c->h264_find_start_code_candidate = h264_find_start_code_candidate_c; if (ARCH_ARM) ff_h264dsp_init_arm(c, bit_depth, chroma_format_idc); if (ARCH_PPC) ff_h264dsp_init_ppc(c, bit_depth, chroma_format_idc); diff --git a/libavcodec/h264dsp.h b/libavcodec/h264dsp.h index 1f9f8fe823..6249ba70d5 100644 --- a/libavcodec/h264dsp.h +++ b/libavcodec/h264dsp.h @@ -105,6 +105,15 @@ typedef struct H264DSPContext { /* bypass-transform */ void (*h264_add_pixels8_clear)(uint8_t *dst, int16_t *block, int stride); void (*h264_add_pixels4_clear)(uint8_t *dst, int16_t *block, int stride); + + /** + * Search buf from the start for up to size bytes. Return the index + * of a zero byte, or >= size if not found. Ideally, use lookahead + * to filter out any zero bytes that are known to not be followed by + * one or more further zero bytes and a one byte. Better still, filter + * out any bytes that form the trailing_zero_8bits syntax element too. + */ + int (*h264_find_start_code_candidate)(const uint8_t *buf, int size); } H264DSPContext; void ff_h264dsp_init(H264DSPContext *c, const int bit_depth, |