diff options
Diffstat (limited to 'mt-work')
-rw-r--r-- | mt-work/email.sh | 6 | ||||
-rw-r--r-- | mt-work/mplayer.diff | 13 | ||||
-rw-r--r-- | mt-work/raw.sh | 10 | ||||
-rw-r--r-- | mt-work/test.sh | 13 | ||||
-rw-r--r-- | mt-work/todo.txt | 95 | ||||
-rw-r--r-- | mt-work/valgrind-check.sh | 5 | ||||
-rw-r--r-- | mt-work/yuvcmp.c | 182 |
7 files changed, 324 insertions, 0 deletions
diff --git a/mt-work/email.sh b/mt-work/email.sh new file mode 100644 index 0000000000..e5cdb72338 --- /dev/null +++ b/mt-work/email.sh @@ -0,0 +1,6 @@ +#!/bin/sh -v + +# args [where to put patches] [smtp server] [destination] + +git format-patch -o "$1" --inline --subject-prefix=soc --thread origin +git send-email --no-chain-reply-to --smtp-server $2 --to $3 --dry-run "$1" diff --git a/mt-work/mplayer.diff b/mt-work/mplayer.diff new file mode 100644 index 0000000000..170b517b3c --- /dev/null +++ b/mt-work/mplayer.diff @@ -0,0 +1,13 @@ +diff --git a/libmpcodecs/vd_ffmpeg.c b/libmpcodecs/vd_ffmpeg.c +index 7c68a20..135e6b1 100644 +--- a/libmpcodecs/vd_ffmpeg.c ++++ b/libmpcodecs/vd_ffmpeg.c +@@ -280,7 +280,7 @@ static int init(sh_video_t *sh){ + return 0; + } + +- if(vd_use_slices && (lavc_codec->capabilities&CODEC_CAP_DRAW_HORIZ_BAND) && !do_vis_debug) ++ if(vd_use_slices && (lavc_codec->capabilities&CODEC_CAP_DRAW_HORIZ_BAND) && !do_vis_debug && lavc_param_threads <= 1) + ctx->do_slices=1; + + if(lavc_codec->capabilities&CODEC_CAP_DR1 && !do_vis_debug && lavc_codec->id != AV_CODEC_ID_H264 && lavc_codec->id != AV_CODEC_ID_INTERPLAY_VIDEO && lavc_codec->id != AV_CODEC_ID_ROQ && lavc_codec->id != AV_CODEC_ID_VP8 && lavc_codec->id != AV_CODEC_ID_LAGARITH) diff --git a/mt-work/raw.sh b/mt-work/raw.sh new file mode 100644 index 0000000000..0ced88e213 --- /dev/null +++ b/mt-work/raw.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +fn=`basename "$1"` +for th in 1 4; do + time ./ffmpeg_g -threads $th -skip_loop_filter all -vsync 0 -y -t 30 -i "$1" -an -f rawvideo "raw/n-$fn-$th.yuv" +done + +#for th in 1 4; do +# time ./ffmpeg_g -threads $th -vsync 0 -y -t 30 -i "$1" -an -f rawvideo "raw/$fn-$th.yuv" +#done diff --git a/mt-work/test.sh b/mt-work/test.sh new file mode 100644 index 0000000000..a88a35bfe6 --- /dev/null +++ b/mt-work/test.sh @@ -0,0 +1,13 @@ +#!/bin/bash + +fn=`basename "$1"` +for th in 1 2 3 4; do + time ./ffmpeg_g -threads $th -flags2 +fast -vsync 0 -y -t 30 -i "$1" -an -f framecrc "crc/$fn-$th.txt" >/dev/null 2>&1 +done + +./ffmpeg_g -threads 1 -y -t 10 -i "$1" -an -f framecrc "crc/$fn-1-vsync.txt" >/dev/null 2>&1 +./ffmpeg_g -threads 3 -y -t 10 -i "$1" -an -f framecrc "crc/$fn-3-vsync.txt" >/dev/null 2>&1 + +md5 "crc/$fn-"[1234].txt +echo +md5 "crc/$fn-"*vsync.txt diff --git a/mt-work/todo.txt b/mt-work/todo.txt new file mode 100644 index 0000000000..f64514f6fa --- /dev/null +++ b/mt-work/todo.txt @@ -0,0 +1,95 @@ +Todo + +-- For other people +- Multithread vc1. +- Multithread an intra codec like mjpeg (trivial). +- Fix mpeg1 (see below). +- Try the first three items under Optimization. +- Fix h264 (see below). +- Try mpeg4 (see below). + +-- Bug fixes + +General critical: +- Error resilience has to run before ff_report_frame_progress() +is called. Otherwise there will be race conditions. (This might already +work.) In general testing error paths should be done more. +- 'make fate THREADS=2' doesn't pass. Most failures are due to +bugs in vsync in ffmpeg.c, which are currently obscuring real failures. + +h264: +- Files that aren't parsed (e.g. mp4) and contain PAFF with two +field pictures in the same packet are not optimal. Modify the +nals_needed check so that the second field's first slice is +considered as needed, then uncomment the FIXME code in decode_postinit. +Ex: http://astrange.ithinksw.net/ffmpeg/mt-samples/PAFF-Chalet-Tire.mp4 + +mpeg4: +- Packed B-frames need to be explicitly split up +when frame threading is on. It's not very fast +without this. +- The buffer age optimization is disabled due to +the way buffers are allocated across threads. The +branch 'fix_buffer_age' has an attempt to fix it +which breaks ffplay. +- Support interlaced. + +mpeg1/2: +- Seeking always prints "first frame not a keyframe" +with threads on. Currently disabled for this reason. + +-- Prove correct + +- decode_update_progress() in h264.c +race_checking branch has some work on h264, +but not that function. It might be worth putting +the branch under #ifdef DEBUG in mainline, but +the code would have to be cleaner. +- MPV_lowest_referenced_row() and co in mpegvideo.c +- Same in vp3. + +-- Optimization + +- Merge h264 decode_update_progress() with loop_filter(). +Add CODEC_CAP_DRAW_HORIZ_BAND as a side effect. +- EMU_EDGE is always set for h264 PAFF+MT +because draw_edges() writes into the other field's +thread's pixels. Needs an option to skip T/B fields. +- Check update_thread_context() functions and make +sure they only copy what they need to. +- Try some more optimization of the "ref < 48; ref++" +loop in h264.c await_references(), try turning the list0/list1 check +above into a loop without being slower. +- Support frame+slice threading at the same time +by assigning slice_count threads for frame threads +to use with execute(). This is simpler but unbalanced +if only one frame thread uses any. + +-- Features + +- Support streams with width/height changing. This +requires flushing all current frames (and buffering +the input in the meantime), closing the codec and +reopening it. Or don't support it. +- Support encoding. Might need more threading primitives +for good ratecontrol; would be nice for audio and libavfilter too. +- Async decoding part 1: instead of trying to +start every thread at the beginning, return a picture +if the earliest thread is already done, but don't wait +for it. Not sure what effect this would have. +- Part 2: have an API that doesn't wait for the decoding +thread, only returns EAGAIN if it's not ready. What will +it do with the next input packet if it returns that? +- Have an API that returns finished pictures but doesn't +require sending new ones. Maybe allow NULL avpkt when +not at the end of the stream. + +-- Samples + +http://astrange.ithinksw.net/ffmpeg/mt-samples/ + +See yuvcmp.c in this directory to compare decoded samples. + +For debugging, try commenting out ff_thread_finish_setup calls so +that only one thread runs at once, and then binary search+ +scatter printfs to look for differences in codec contexts. diff --git a/mt-work/valgrind-check.sh b/mt-work/valgrind-check.sh new file mode 100644 index 0000000000..276327a76a --- /dev/null +++ b/mt-work/valgrind-check.sh @@ -0,0 +1,5 @@ +#!/bin/bash + +valgrind --track-origins=yes --leak-check=full ./ffmpeg_g -threads 1 -vsync 0 -y -t 30 -i "$1" -an -f null /dev/null + +valgrind --track-origins=yes --leak-check=full ./ffmpeg_g -threads 3 -vsync 0 -y -t 30 -i "$1" -an -f null /dev/null diff --git a/mt-work/yuvcmp.c b/mt-work/yuvcmp.c new file mode 100644 index 0000000000..11585f9b4c --- /dev/null +++ b/mt-work/yuvcmp.c @@ -0,0 +1,182 @@ +/* + * originally by Andreas Ă–man (andoma) + * some changes by Alexander Strange + */ + +#include <string.h> +#include <stdlib.h> +#include <inttypes.h> +#include <stdio.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> + + +int +main(int argc, char **argv) +{ + int fd[2]; + int print_pixels = 0; + int dump_blocks = 0; + + int width; + int height; + int to_skip = 0; + + if (argc < 6) { + fprintf(stderr, "%s [YUV file 1] [YUV file 2] width height pixelcmp|blockdump (# to skip)\n", argv[0]); + return 1; + } + + width = atoi(argv[3]); + height = atoi(argv[4]); + if (argc > 6) + to_skip = atoi(argv[6]); + + uint8_t *Y[2], *C[2][2]; + int i, v, c, p; + int lsiz = width * height; + int csiz = width * height / 4; + int x, y; + int cwidth = width / 2; + int fr = to_skip; + int mb; + char *mberrors; + int mb_x, mb_y; + uint8_t *a; + uint8_t *b; + int die = 0; + + print_pixels = strstr(argv[5], "pixelcmp") ? 1 : 0; + dump_blocks = strstr(argv[5], "blockdump") ? 1 : 0; + + for(i = 0; i < 2; i++) { + Y[i] = malloc(lsiz); + C[0][i] = malloc(csiz); + C[1][i] = malloc(csiz); + + fd[i] = open(argv[1 + i], O_RDONLY); + if(fd[i] == -1) { + perror("open"); + exit(1); + } + fcntl(fd[i], F_NOCACHE, 1); + + if (to_skip) + lseek(fd[i], to_skip * (lsiz + 2*csiz), SEEK_SET); + } + + mb_x = width / 16; + mb_y = height / 16; + + mberrors = malloc(mb_x * mb_y); + + while(!die) { + memset(mberrors, 0, mb_x * mb_y); + + printf("Loading frame %d\n", ++fr); + + for(i = 0; i < 2; i++) { + v = read(fd[i], Y[i], lsiz); + if(v != lsiz) { + fprintf(stderr, "Unable to read Y from file %d, exiting\n", i + 1); + return 1; + } + } + + + for(c = 0; c < lsiz; c++) { + if(Y[0][c] != Y[1][c]) { + x = c % width; + y = c / width; + + mb = x / 16 + (y / 16) * mb_x; + + if(print_pixels) + printf("Luma diff 0x%02x != 0x%02x at pixel (%4d,%-4d) mb(%d,%d) #%d\n", + Y[0][c], + Y[1][c], + x, y, + x / 16, + y / 16, + mb); + + mberrors[mb] |= 1; + } + } + + /* Chroma planes */ + + for(p = 0; p < 2; p++) { + + for(i = 0; i < 2; i++) { + v = read(fd[i], C[p][i], csiz); + if(v != csiz) { + fprintf(stderr, "Unable to read %c from file %d, exiting\n", + "UV"[p], i + 1); + return 1; + } + } + + for(c = 0; c < csiz; c++) { + if(C[p][0][c] != C[p][1][c]) { + x = c % cwidth; + y = c / cwidth; + + mb = x / 8 + (y / 8) * mb_x; + + mberrors[mb] |= 2 << p; + + if(print_pixels) + + printf("c%c diff 0x%02x != 0x%02x at pixel (%4d,%-4d) " + "mb(%3d,%-3d) #%d\n", + p ? 'r' : 'b', + C[p][0][c], + C[p][1][c], + + x, y, + x / 8, + y / 8, + x / 8 + y / 8 * cwidth / 8); + } + } + } + + for(i = 0; i < mb_x * mb_y; i++) { + x = i % mb_x; + y = i / mb_x; + + if(mberrors[i]) { + die = 1; + + printf("MB (%3d,%-3d) %4d %d %c%c%c damaged\n", + x, y, i, mberrors[i], + mberrors[i] & 1 ? 'Y' : ' ', + mberrors[i] & 2 ? 'U' : ' ', + mberrors[i] & 4 ? 'V' : ' '); + + if(dump_blocks) { + a = Y[0] + x * 16 + y * 16 * width; + b = Y[1] + x * 16 + y * 16 * width; + + for(y = 0; y < 16; y++) { + printf("%c ", "TB"[y&1]); + for(x = 0; x < 16; x++) + printf("%02x%c", a[x + y * width], + a[x + y * width] != b[x + y * width] ? '<' : ' '); + + printf("| "); + for(x = 0; x < 16; x++) + printf("%02x%c", b[x + y * width], + a[x + y * width] != b[x + y * width] ? '<' : ' '); + + printf("\n"); + } + } + } + } + } + + return 0; +} |