From f6573cb75a1c6d57c35f22a8b5d8ce134a9d3d86 Mon Sep 17 00:00:00 2001 From: "Steinar H. Gunderson" Date: Mon, 18 Mar 2019 23:52:11 +0100 Subject: i965_encoder: Speed up i965_MapBuffer for JPEG encoding somewhat. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Searching for the EOI marker byte-by-byte turned out to notable in profiles when encoding large amounts of JPEGs (~15% of a core at 480 fps of 1080p images). Using memmem() will typically give us an AVX-optimized version of at least finding the 0xFF character, which is much more efficient. It seems to speed up this part by about 3–4x in practice, taking it largely off the profiles. Signed-off-by: Steinar H. Gunderson --- src/i965_drv_video.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c index e1b688ae..4aa8da7b 100644 --- a/src/i965_drv_video.c +++ b/src/i965_drv_video.c @@ -3116,12 +3116,12 @@ i965_MapBuffer(VADriverContextP ctx, } if (coded_buffer_segment->codec == CODEC_JPEG) { - for (i = 0; i < obj_buffer->size_element - header_offset - 1 - 0x1000; i++) { - if ((buffer[i] == 0xFF) && (buffer[i + 1] == 0xD9)) { - break; - } - } - coded_buffer_segment->base.size = i + 2; + int len = obj_buffer->size_element - header_offset - 1 - 0x1000; + unsigned char *end_of_file_marker = memmem(buffer, len, "\xff\xd9", 2); + if (end_of_file_marker == NULL) + coded_buffer_segment->base.size = len + 2; + else + coded_buffer_segment->base.size = (end_of_file_marker - buffer) + 2; } else if (coded_buffer_segment->codec != CODEC_VP8) { /* vp8 coded buffer size can be told by vp8 internal statistics buffer, so it don't need to traversal the coded buffer */ -- cgit v1.2.1