diff options
author | Seungha Yang <seungha@centricular.com> | 2021-08-05 19:59:38 +0900 |
---|---|---|
committer | GStreamer Marge Bot <gitlab-merge-bot@gstreamer-foundation.org> | 2021-08-24 11:09:28 +0000 |
commit | c654f86859d03bb91a278464c3410244d80d60aa (patch) | |
tree | fa6080b279cfe00aa0d28b82b1f6887080f585c6 | |
parent | f5a79ce05f62ad98134435955ed3d10d22f17cb9 (diff) | |
download | gstreamer-plugins-base-c654f86859d03bb91a278464c3410244d80d60aa.tar.gz |
video-converter: Add support for A420 to RGB fast path
Add fast path for A420 -> RGB format conversion
Part-of: <https://gitlab.freedesktop.org/gstreamer/gst-plugins-base/-/merge_requests/1245>
-rw-r--r-- | gst-libs/gst/video/video-converter.c | 191 | ||||
-rw-r--r-- | gst-libs/gst/video/video-orc-dist.c | 922 | ||||
-rw-r--r-- | gst-libs/gst/video/video-orc-dist.h | 7 | ||||
-rw-r--r-- | gst-libs/gst/video/video-orc.orc | 112 |
4 files changed, 1228 insertions, 4 deletions
diff --git a/gst-libs/gst/video/video-converter.c b/gst-libs/gst/video/video-converter.c index 990168eef..7d5c2297d 100644 --- a/gst-libs/gst/video/video-converter.c +++ b/gst-libs/gst/video/video-converter.c @@ -5919,6 +5919,175 @@ convert_I420_pack_ARGB (GstVideoConverter * convert, const GstVideoFrame * src, } static void +convert_A420_pack_ARGB_task (FConvertTask * task) +{ + gint i; + gpointer d[GST_VIDEO_MAX_PLANES]; + + d[0] = FRAME_GET_LINE (task->dest, 0); + d[0] = + (guint8 *) d[0] + + task->out_x * GST_VIDEO_FORMAT_INFO_PSTRIDE (task->dest->info.finfo, 0); + + for (i = task->height_0; i < task->height_1; i++) { + guint8 *sy, *su, *sv, *sa; + + sy = FRAME_GET_Y_LINE (task->src, i + task->in_y); + sy += task->in_x; + su = FRAME_GET_U_LINE (task->src, (i + task->in_y) >> 1); + su += (task->in_x >> 1); + sv = FRAME_GET_V_LINE (task->src, (i + task->in_y) >> 1); + sv += (task->in_x >> 1); + sa = FRAME_GET_A_LINE (task->src, i + task->in_y); + sa += task->in_x; + +#if G_BYTE_ORDER == G_LITTLE_ENDIAN + video_orc_convert_A420_ARGB (task->tmpline, sy, su, sv, sa, + task->data->im[0][0], task->data->im[0][2], + task->data->im[2][1], task->data->im[1][1], task->data->im[1][2], + task->width); +#else + video_orc_convert_A420_BGRA (task->tmpline, sy, su, sv, sa, + task->data->im[0][0], task->data->im[0][2], + task->data->im[2][1], task->data->im[1][1], task->data->im[1][2], + task->width); +#endif + + task->dest->info.finfo->pack_func (task->dest->info.finfo, + (GST_VIDEO_FRAME_IS_INTERLACED (task->dest) ? + GST_VIDEO_PACK_FLAG_INTERLACED : + GST_VIDEO_PACK_FLAG_NONE), + task->tmpline, 0, d, task->dest->info.stride, + task->dest->info.chroma_site, i + task->out_y, task->width); + } +} + +static void +convert_A420_pack_ARGB (GstVideoConverter * convert, const GstVideoFrame * src, + GstVideoFrame * dest) +{ + int i; + gint width = convert->in_width; + gint height = convert->in_height; + MatrixData *data = &convert->convert_matrix; + FConvertTask *tasks; + FConvertTask **tasks_p; + gint n_threads; + gint lines_per_thread; + + n_threads = convert->conversion_runner->n_threads; + tasks = convert->tasks[0] = + g_renew (FConvertTask, convert->tasks[0], n_threads); + tasks_p = convert->tasks_p[0] = + g_renew (FConvertTask *, convert->tasks_p[0], n_threads); + + lines_per_thread = (height + n_threads - 1) / n_threads; + + for (i = 0; i < n_threads; i++) { + tasks[i].src = src; + tasks[i].dest = dest; + + tasks[i].width = width; + tasks[i].data = data; + tasks[i].in_x = convert->in_x; + tasks[i].in_y = convert->in_y; + tasks[i].out_x = convert->out_x; + tasks[i].out_y = convert->out_y; + tasks[i].tmpline = convert->tmpline[i]; + + tasks[i].height_0 = i * lines_per_thread; + tasks[i].height_1 = tasks[i].height_0 + lines_per_thread; + tasks[i].height_1 = MIN (height, tasks[i].height_1); + + tasks_p[i] = &tasks[i]; + } + + gst_parallelized_task_runner_run (convert->conversion_runner, + (GstParallelizedTaskFunc) convert_A420_pack_ARGB_task, + (gpointer) tasks_p); + + convert_fill_border (convert, dest); +} + +static void +convert_A420_BGRA_task (FConvertTask * task) +{ + gint i; + + for (i = task->height_0; i < task->height_1; i++) { + guint8 *sy, *su, *sv, *sa, *d; + + d = FRAME_GET_LINE (task->dest, i + task->out_y); + d += (task->out_x * 4); + sy = FRAME_GET_Y_LINE (task->src, i + task->in_y); + sy += task->in_x; + su = FRAME_GET_U_LINE (task->src, (i + task->in_y) >> 1); + su += (task->in_x >> 1); + sv = FRAME_GET_V_LINE (task->src, (i + task->in_y) >> 1); + sv += (task->in_x >> 1); + sa = FRAME_GET_A_LINE (task->src, i + task->in_y); + sa += task->in_x; + +#if G_BYTE_ORDER == G_LITTLE_ENDIAN + video_orc_convert_A420_BGRA (d, sy, su, sv, sa, + task->data->im[0][0], task->data->im[0][2], + task->data->im[2][1], task->data->im[1][1], task->data->im[1][2], + task->width); +#else + video_orc_convert_A420_ARGB (d, sy, su, sv, sa, + task->data->im[0][0], task->data->im[0][2], + task->data->im[2][1], task->data->im[1][1], task->data->im[1][2], + task->width); +#endif + } +} + +static void +convert_A420_BGRA (GstVideoConverter * convert, const GstVideoFrame * src, + GstVideoFrame * dest) +{ + int i; + gint width = convert->in_width; + gint height = convert->in_height; + MatrixData *data = &convert->convert_matrix; + FConvertTask *tasks; + FConvertTask **tasks_p; + gint n_threads; + gint lines_per_thread; + + n_threads = convert->conversion_runner->n_threads; + tasks = convert->tasks[0] = + g_renew (FConvertTask, convert->tasks[0], n_threads); + tasks_p = convert->tasks_p[0] = + g_renew (FConvertTask *, convert->tasks_p[0], n_threads); + + lines_per_thread = (height + n_threads - 1) / n_threads; + + for (i = 0; i < n_threads; i++) { + tasks[i].src = src; + tasks[i].dest = dest; + + tasks[i].width = width; + tasks[i].data = data; + tasks[i].in_x = convert->in_x; + tasks[i].in_y = convert->in_y; + tasks[i].out_x = convert->out_x; + tasks[i].out_y = convert->out_y; + + tasks[i].height_0 = i * lines_per_thread; + tasks[i].height_1 = tasks[i].height_0 + lines_per_thread; + tasks[i].height_1 = MIN (height, tasks[i].height_1); + + tasks_p[i] = &tasks[i]; + } + + gst_parallelized_task_runner_run (convert->conversion_runner, + (GstParallelizedTaskFunc) convert_A420_BGRA_task, (gpointer) tasks_p); + + convert_fill_border (convert, dest); +} + +static void memset_u24 (guint8 * data, guint8 col[3], unsigned int n) { unsigned int i; @@ -7372,6 +7541,28 @@ static const VideoTransform transforms[] = { {GST_VIDEO_FORMAT_YV12, GST_VIDEO_FORMAT_BGR16, FALSE, TRUE, TRUE, TRUE, TRUE, FALSE, FALSE, FALSE, 0, 0, convert_I420_pack_ARGB}, + {GST_VIDEO_FORMAT_A420, GST_VIDEO_FORMAT_ABGR, FALSE, TRUE, TRUE, TRUE, + TRUE, TRUE, FALSE, FALSE, 0, 0, convert_A420_pack_ARGB}, + {GST_VIDEO_FORMAT_A420, GST_VIDEO_FORMAT_RGBA, FALSE, TRUE, TRUE, TRUE, + TRUE, TRUE, FALSE, FALSE, 0, 0, convert_A420_pack_ARGB}, + {GST_VIDEO_FORMAT_A420, GST_VIDEO_FORMAT_BGRA, FALSE, TRUE, TRUE, TRUE, + TRUE, TRUE, FALSE, FALSE, 0, 0, convert_A420_BGRA}, + /* A420 to non-alpha RGB formats, reuse I420_* method */ + {GST_VIDEO_FORMAT_A420, GST_VIDEO_FORMAT_BGRx, FALSE, TRUE, TRUE, TRUE, + TRUE, FALSE, FALSE, FALSE, 0, 0, convert_I420_BGRA}, + {GST_VIDEO_FORMAT_A420, GST_VIDEO_FORMAT_xBGR, FALSE, TRUE, TRUE, TRUE, + TRUE, FALSE, FALSE, FALSE, 0, 0, convert_I420_pack_ARGB}, + {GST_VIDEO_FORMAT_A420, GST_VIDEO_FORMAT_RGBx, FALSE, TRUE, TRUE, TRUE, + TRUE, FALSE, FALSE, FALSE, 0, 0, convert_I420_pack_ARGB}, + {GST_VIDEO_FORMAT_A420, GST_VIDEO_FORMAT_RGB, FALSE, TRUE, TRUE, TRUE, + TRUE, FALSE, FALSE, FALSE, 0, 0, convert_I420_pack_ARGB}, + {GST_VIDEO_FORMAT_A420, GST_VIDEO_FORMAT_BGR, FALSE, TRUE, TRUE, TRUE, + TRUE, FALSE, FALSE, FALSE, 0, 0, convert_I420_pack_ARGB}, + {GST_VIDEO_FORMAT_A420, GST_VIDEO_FORMAT_RGB15, FALSE, TRUE, TRUE, TRUE, + TRUE, FALSE, FALSE, FALSE, 0, 0, convert_I420_pack_ARGB}, + {GST_VIDEO_FORMAT_A420, GST_VIDEO_FORMAT_BGR16, FALSE, TRUE, TRUE, TRUE, + TRUE, FALSE, FALSE, FALSE, 0, 0, convert_I420_pack_ARGB}, + /* scalers */ {GST_VIDEO_FORMAT_GBR, GST_VIDEO_FORMAT_GBR, TRUE, FALSE, FALSE, TRUE, TRUE, FALSE, FALSE, FALSE, 0, 0, convert_scale_planes}, diff --git a/gst-libs/gst/video/video-orc-dist.c b/gst-libs/gst/video/video-orc-dist.c index 809637f1d..f8ca3f5f3 100644 --- a/gst-libs/gst/video/video-orc-dist.c +++ b/gst-libs/gst/video/video-orc-dist.c @@ -360,6 +360,14 @@ void video_orc_convert_I420_ARGB (guint8 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, const guint8 * ORC_RESTRICT s2, const guint8 * ORC_RESTRICT s3, int p1, int p2, int p3, int p4, int p5, int n); +void video_orc_convert_A420_ARGB (guint8 * ORC_RESTRICT d1, + const guint8 * ORC_RESTRICT s1, const guint8 * ORC_RESTRICT s2, + const guint8 * ORC_RESTRICT s3, const guint8 * ORC_RESTRICT s4, int p1, + int p2, int p3, int p4, int p5, int n); +void video_orc_convert_A420_BGRA (guint8 * ORC_RESTRICT d1, + const guint8 * ORC_RESTRICT s1, const guint8 * ORC_RESTRICT s2, + const guint8 * ORC_RESTRICT s3, const guint8 * ORC_RESTRICT s4, int p1, + int p2, int p3, int p4, int p5, int n); void video_orc_matrix8 (guint8 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, orc_int64 p1, orc_int64 p2, orc_int64 p3, orc_int64 p4, int n); @@ -24295,6 +24303,920 @@ video_orc_convert_I420_ARGB (guint8 * ORC_RESTRICT d1, #endif +/* video_orc_convert_A420_ARGB */ +#ifdef DISABLE_ORC +void +video_orc_convert_A420_ARGB (guint8 * ORC_RESTRICT d1, + const guint8 * ORC_RESTRICT s1, const guint8 * ORC_RESTRICT s2, + const guint8 * ORC_RESTRICT s3, const guint8 * ORC_RESTRICT s4, int p1, + int p2, int p3, int p4, int p5, int n) +{ + int i; + orc_union32 *ORC_RESTRICT ptr0; + const orc_int8 *ORC_RESTRICT ptr4; + const orc_int8 *ORC_RESTRICT ptr5; + const orc_int8 *ORC_RESTRICT ptr6; + const orc_int8 *ORC_RESTRICT ptr7; + orc_int8 var43; +#if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__) + volatile orc_int8 var44; +#else + orc_int8 var44; +#endif + orc_int8 var45; + orc_union16 var46; + orc_union16 var47; + orc_union16 var48; + orc_union16 var49; + orc_union16 var50; +#if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__) + volatile orc_union32 var51; +#else + orc_union32 var51; +#endif + orc_union32 var52; + orc_int8 var53; + orc_union16 var54; + orc_int8 var55; + orc_int8 var56; + orc_union16 var57; + orc_int8 var58; + orc_int8 var59; + orc_union16 var60; + orc_int8 var61; + orc_union16 var62; + orc_union16 var63; + orc_union16 var64; + orc_int8 var65; + orc_union16 var66; + orc_union16 var67; + orc_union16 var68; + orc_int8 var69; + orc_union16 var70; + orc_union16 var71; + orc_union16 var72; + orc_union16 var73; + orc_int8 var74; + orc_union16 var75; + orc_union32 var76; + + ptr0 = (orc_union32 *) d1; + ptr4 = (orc_int8 *) s1; + ptr5 = (orc_int8 *) s2; + ptr6 = (orc_int8 *) s3; + ptr7 = (orc_int8 *) s4; + + /* 1: loadpb */ + var44 = 0x00000080; /* 128 or 6.32404e-322f */ + /* 12: loadpw */ + var46.i = p1; + /* 14: loadpw */ + var47.i = p2; + /* 19: loadpw */ + var48.i = p3; + /* 23: loadpw */ + var49.i = p4; + /* 26: loadpw */ + var50.i = p5; + /* 32: loadpb */ + var51.x4[0] = 0x00000080; /* 128 or 6.32404e-322f */ + var51.x4[1] = 0x00000080; /* 128 or 6.32404e-322f */ + var51.x4[2] = 0x00000080; /* 128 or 6.32404e-322f */ + var51.x4[3] = 0x00000080; /* 128 or 6.32404e-322f */ + + for (i = 0; i < n; i++) { + /* 0: loadb */ + var43 = ptr4[i]; + /* 2: subb */ + var53 = var43 - var44; + /* 3: splatbw */ + var54.i = ((var53 & 0xff) << 8) | (var53 & 0xff); + /* 4: loadupdb */ + var55 = ptr5[i >> 1]; + /* 5: subb */ + var56 = var55 - var44; + /* 6: splatbw */ + var57.i = ((var56 & 0xff) << 8) | (var56 & 0xff); + /* 7: loadupdb */ + var58 = ptr6[i >> 1]; + /* 8: subb */ + var59 = var58 - var44; + /* 9: splatbw */ + var60.i = ((var59 & 0xff) << 8) | (var59 & 0xff); + /* 10: loadb */ + var45 = ptr7[i]; + /* 11: subb */ + var61 = var45 - var44; + /* 13: mulhsw */ + var62.i = (var54.i * var46.i) >> 16; + /* 15: mulhsw */ + var63.i = (var60.i * var47.i) >> 16; + /* 16: addw */ + var64.i = var62.i + var63.i; + /* 17: convssswb */ + var65 = ORC_CLAMP_SB (var64.i); + /* 18: mergebw */ + { + orc_union16 _dest; + _dest.x2[0] = var61; + _dest.x2[1] = var65; + var66.i = _dest.i; + } + /* 20: mulhsw */ + var67.i = (var57.i * var48.i) >> 16; + /* 21: addw */ + var68.i = var62.i + var67.i; + /* 22: convssswb */ + var69 = ORC_CLAMP_SB (var68.i); + /* 24: mulhsw */ + var70.i = (var57.i * var49.i) >> 16; + /* 25: addw */ + var71.i = var62.i + var70.i; + /* 27: mulhsw */ + var72.i = (var60.i * var50.i) >> 16; + /* 28: addw */ + var73.i = var71.i + var72.i; + /* 29: convssswb */ + var74 = ORC_CLAMP_SB (var73.i); + /* 30: mergebw */ + { + orc_union16 _dest; + _dest.x2[0] = var74; + _dest.x2[1] = var69; + var75.i = _dest.i; + } + /* 31: mergewl */ + { + orc_union32 _dest; + _dest.x2[0] = var66.i; + _dest.x2[1] = var75.i; + var76.i = _dest.i; + } + /* 33: addb */ + var52.x4[0] = var76.x4[0] + var51.x4[0]; + var52.x4[1] = var76.x4[1] + var51.x4[1]; + var52.x4[2] = var76.x4[2] + var51.x4[2]; + var52.x4[3] = var76.x4[3] + var51.x4[3]; + /* 34: storel */ + ptr0[i] = var52; + } + +} + +#else +static void +_backup_video_orc_convert_A420_ARGB (OrcExecutor * ORC_RESTRICT ex) +{ + int i; + int n = ex->n; + orc_union32 *ORC_RESTRICT ptr0; + const orc_int8 *ORC_RESTRICT ptr4; + const orc_int8 *ORC_RESTRICT ptr5; + const orc_int8 *ORC_RESTRICT ptr6; + const orc_int8 *ORC_RESTRICT ptr7; + orc_int8 var43; +#if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__) + volatile orc_int8 var44; +#else + orc_int8 var44; +#endif + orc_int8 var45; + orc_union16 var46; + orc_union16 var47; + orc_union16 var48; + orc_union16 var49; + orc_union16 var50; +#if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__) + volatile orc_union32 var51; +#else + orc_union32 var51; +#endif + orc_union32 var52; + orc_int8 var53; + orc_union16 var54; + orc_int8 var55; + orc_int8 var56; + orc_union16 var57; + orc_int8 var58; + orc_int8 var59; + orc_union16 var60; + orc_int8 var61; + orc_union16 var62; + orc_union16 var63; + orc_union16 var64; + orc_int8 var65; + orc_union16 var66; + orc_union16 var67; + orc_union16 var68; + orc_int8 var69; + orc_union16 var70; + orc_union16 var71; + orc_union16 var72; + orc_union16 var73; + orc_int8 var74; + orc_union16 var75; + orc_union32 var76; + + ptr0 = (orc_union32 *) ex->arrays[0]; + ptr4 = (orc_int8 *) ex->arrays[4]; + ptr5 = (orc_int8 *) ex->arrays[5]; + ptr6 = (orc_int8 *) ex->arrays[6]; + ptr7 = (orc_int8 *) ex->arrays[7]; + + /* 1: loadpb */ + var44 = 0x00000080; /* 128 or 6.32404e-322f */ + /* 12: loadpw */ + var46.i = ex->params[24]; + /* 14: loadpw */ + var47.i = ex->params[25]; + /* 19: loadpw */ + var48.i = ex->params[26]; + /* 23: loadpw */ + var49.i = ex->params[27]; + /* 26: loadpw */ + var50.i = ex->params[28]; + /* 32: loadpb */ + var51.x4[0] = 0x00000080; /* 128 or 6.32404e-322f */ + var51.x4[1] = 0x00000080; /* 128 or 6.32404e-322f */ + var51.x4[2] = 0x00000080; /* 128 or 6.32404e-322f */ + var51.x4[3] = 0x00000080; /* 128 or 6.32404e-322f */ + + for (i = 0; i < n; i++) { + /* 0: loadb */ + var43 = ptr4[i]; + /* 2: subb */ + var53 = var43 - var44; + /* 3: splatbw */ + var54.i = ((var53 & 0xff) << 8) | (var53 & 0xff); + /* 4: loadupdb */ + var55 = ptr5[i >> 1]; + /* 5: subb */ + var56 = var55 - var44; + /* 6: splatbw */ + var57.i = ((var56 & 0xff) << 8) | (var56 & 0xff); + /* 7: loadupdb */ + var58 = ptr6[i >> 1]; + /* 8: subb */ + var59 = var58 - var44; + /* 9: splatbw */ + var60.i = ((var59 & 0xff) << 8) | (var59 & 0xff); + /* 10: loadb */ + var45 = ptr7[i]; + /* 11: subb */ + var61 = var45 - var44; + /* 13: mulhsw */ + var62.i = (var54.i * var46.i) >> 16; + /* 15: mulhsw */ + var63.i = (var60.i * var47.i) >> 16; + /* 16: addw */ + var64.i = var62.i + var63.i; + /* 17: convssswb */ + var65 = ORC_CLAMP_SB (var64.i); + /* 18: mergebw */ + { + orc_union16 _dest; + _dest.x2[0] = var61; + _dest.x2[1] = var65; + var66.i = _dest.i; + } + /* 20: mulhsw */ + var67.i = (var57.i * var48.i) >> 16; + /* 21: addw */ + var68.i = var62.i + var67.i; + /* 22: convssswb */ + var69 = ORC_CLAMP_SB (var68.i); + /* 24: mulhsw */ + var70.i = (var57.i * var49.i) >> 16; + /* 25: addw */ + var71.i = var62.i + var70.i; + /* 27: mulhsw */ + var72.i = (var60.i * var50.i) >> 16; + /* 28: addw */ + var73.i = var71.i + var72.i; + /* 29: convssswb */ + var74 = ORC_CLAMP_SB (var73.i); + /* 30: mergebw */ + { + orc_union16 _dest; + _dest.x2[0] = var74; + _dest.x2[1] = var69; + var75.i = _dest.i; + } + /* 31: mergewl */ + { + orc_union32 _dest; + _dest.x2[0] = var66.i; + _dest.x2[1] = var75.i; + var76.i = _dest.i; + } + /* 33: addb */ + var52.x4[0] = var76.x4[0] + var51.x4[0]; + var52.x4[1] = var76.x4[1] + var51.x4[1]; + var52.x4[2] = var76.x4[2] + var51.x4[2]; + var52.x4[3] = var76.x4[3] + var51.x4[3]; + /* 34: storel */ + ptr0[i] = var52; + } + +} + +void +video_orc_convert_A420_ARGB (guint8 * ORC_RESTRICT d1, + const guint8 * ORC_RESTRICT s1, const guint8 * ORC_RESTRICT s2, + const guint8 * ORC_RESTRICT s3, const guint8 * ORC_RESTRICT s4, int p1, + int p2, int p3, int p4, int p5, int n) +{ + OrcExecutor _ex, *ex = &_ex; + static volatile int p_inited = 0; + static OrcCode *c = 0; + void (*func) (OrcExecutor *); + + if (!p_inited) { + orc_once_mutex_lock (); + if (!p_inited) { + OrcProgram *p; + +#if 1 + static const orc_uint8 bc[] = { + 1, 9, 27, 118, 105, 100, 101, 111, 95, 111, 114, 99, 95, 99, 111, 110, + 118, 101, 114, 116, 95, 65, 52, 50, 48, 95, 65, 82, 71, 66, 11, 4, + 4, 12, 1, 1, 12, 1, 1, 12, 1, 1, 12, 1, 1, 14, 1, 128, + 0, 0, 0, 16, 2, 16, 2, 16, 2, 16, 2, 16, 2, 20, 2, 20, + 2, 20, 2, 20, 2, 20, 2, 20, 2, 20, 1, 20, 1, 20, 1, 20, + 1, 20, 4, 65, 38, 4, 16, 151, 32, 38, 45, 38, 5, 65, 38, 38, + 16, 151, 33, 38, 45, 38, 6, 65, 38, 38, 16, 151, 34, 38, 65, 41, + 7, 16, 90, 32, 32, 24, 90, 35, 34, 25, 70, 35, 32, 35, 159, 38, + 35, 196, 35, 41, 38, 90, 37, 33, 26, 70, 37, 32, 37, 159, 40, 37, + 90, 36, 33, 27, 70, 36, 32, 36, 90, 32, 34, 28, 70, 36, 36, 32, + 159, 39, 36, 196, 37, 39, 40, 195, 42, 35, 37, 21, 2, 33, 0, 42, + 16, 2, 0, + }; + p = orc_program_new_from_static_bytecode (bc); + orc_program_set_backup_function (p, _backup_video_orc_convert_A420_ARGB); +#else + p = orc_program_new (); + orc_program_set_name (p, "video_orc_convert_A420_ARGB"); + orc_program_set_backup_function (p, _backup_video_orc_convert_A420_ARGB); + orc_program_add_destination (p, 4, "d1"); + orc_program_add_source (p, 1, "s1"); + orc_program_add_source (p, 1, "s2"); + orc_program_add_source (p, 1, "s3"); + orc_program_add_source (p, 1, "s4"); + orc_program_add_constant (p, 1, 0x00000080, "c1"); + orc_program_add_parameter (p, 2, "p1"); + orc_program_add_parameter (p, 2, "p2"); + orc_program_add_parameter (p, 2, "p3"); + orc_program_add_parameter (p, 2, "p4"); + orc_program_add_parameter (p, 2, "p5"); + orc_program_add_temporary (p, 2, "t1"); + orc_program_add_temporary (p, 2, "t2"); + orc_program_add_temporary (p, 2, "t3"); + orc_program_add_temporary (p, 2, "t4"); + orc_program_add_temporary (p, 2, "t5"); + orc_program_add_temporary (p, 2, "t6"); + orc_program_add_temporary (p, 1, "t7"); + orc_program_add_temporary (p, 1, "t8"); + orc_program_add_temporary (p, 1, "t9"); + orc_program_add_temporary (p, 1, "t10"); + orc_program_add_temporary (p, 4, "t11"); + + orc_program_append_2 (p, "subb", 0, ORC_VAR_T7, ORC_VAR_S1, ORC_VAR_C1, + ORC_VAR_D1); + orc_program_append_2 (p, "splatbw", 0, ORC_VAR_T1, ORC_VAR_T7, ORC_VAR_D1, + ORC_VAR_D1); + orc_program_append_2 (p, "loadupdb", 0, ORC_VAR_T7, ORC_VAR_S2, + ORC_VAR_D1, ORC_VAR_D1); + orc_program_append_2 (p, "subb", 0, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_C1, + ORC_VAR_D1); + orc_program_append_2 (p, "splatbw", 0, ORC_VAR_T2, ORC_VAR_T7, ORC_VAR_D1, + ORC_VAR_D1); + orc_program_append_2 (p, "loadupdb", 0, ORC_VAR_T7, ORC_VAR_S3, + ORC_VAR_D1, ORC_VAR_D1); + orc_program_append_2 (p, "subb", 0, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_C1, + ORC_VAR_D1); + orc_program_append_2 (p, "splatbw", 0, ORC_VAR_T3, ORC_VAR_T7, ORC_VAR_D1, + ORC_VAR_D1); + orc_program_append_2 (p, "subb", 0, ORC_VAR_T10, ORC_VAR_S4, ORC_VAR_C1, + ORC_VAR_D1); + orc_program_append_2 (p, "mulhsw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P1, + ORC_VAR_D1); + orc_program_append_2 (p, "mulhsw", 0, ORC_VAR_T4, ORC_VAR_T3, ORC_VAR_P2, + ORC_VAR_D1); + orc_program_append_2 (p, "addw", 0, ORC_VAR_T4, ORC_VAR_T1, ORC_VAR_T4, + ORC_VAR_D1); + orc_program_append_2 (p, "convssswb", 0, ORC_VAR_T7, ORC_VAR_T4, + ORC_VAR_D1, ORC_VAR_D1); + orc_program_append_2 (p, "mergebw", 0, ORC_VAR_T4, ORC_VAR_T10, + ORC_VAR_T7, ORC_VAR_D1); + orc_program_append_2 (p, "mulhsw", 0, ORC_VAR_T6, ORC_VAR_T2, ORC_VAR_P3, + ORC_VAR_D1); + orc_program_append_2 (p, "addw", 0, ORC_VAR_T6, ORC_VAR_T1, ORC_VAR_T6, + ORC_VAR_D1); + orc_program_append_2 (p, "convssswb", 0, ORC_VAR_T9, ORC_VAR_T6, + ORC_VAR_D1, ORC_VAR_D1); + orc_program_append_2 (p, "mulhsw", 0, ORC_VAR_T5, ORC_VAR_T2, ORC_VAR_P4, + ORC_VAR_D1); + orc_program_append_2 (p, "addw", 0, ORC_VAR_T5, ORC_VAR_T1, ORC_VAR_T5, + ORC_VAR_D1); + orc_program_append_2 (p, "mulhsw", 0, ORC_VAR_T1, ORC_VAR_T3, ORC_VAR_P5, + ORC_VAR_D1); + orc_program_append_2 (p, "addw", 0, ORC_VAR_T5, ORC_VAR_T5, ORC_VAR_T1, + ORC_VAR_D1); + orc_program_append_2 (p, "convssswb", 0, ORC_VAR_T8, ORC_VAR_T5, + ORC_VAR_D1, ORC_VAR_D1); + orc_program_append_2 (p, "mergebw", 0, ORC_VAR_T6, ORC_VAR_T8, ORC_VAR_T9, + ORC_VAR_D1); + orc_program_append_2 (p, "mergewl", 0, ORC_VAR_T11, ORC_VAR_T4, + ORC_VAR_T6, ORC_VAR_D1); + orc_program_append_2 (p, "addb", 2, ORC_VAR_D1, ORC_VAR_T11, ORC_VAR_C1, + ORC_VAR_D1); +#endif + + orc_program_compile (p); + c = orc_program_take_code (p); + orc_program_free (p); + } + p_inited = TRUE; + orc_once_mutex_unlock (); + } + ex->arrays[ORC_VAR_A2] = c; + ex->program = 0; + + ex->n = n; + ex->arrays[ORC_VAR_D1] = d1; + ex->arrays[ORC_VAR_S1] = (void *) s1; + ex->arrays[ORC_VAR_S2] = (void *) s2; + ex->arrays[ORC_VAR_S3] = (void *) s3; + ex->arrays[ORC_VAR_S4] = (void *) s4; + ex->params[ORC_VAR_P1] = p1; + ex->params[ORC_VAR_P2] = p2; + ex->params[ORC_VAR_P3] = p3; + ex->params[ORC_VAR_P4] = p4; + ex->params[ORC_VAR_P5] = p5; + + func = c->exec; + func (ex); +} +#endif + + +/* video_orc_convert_A420_BGRA */ +#ifdef DISABLE_ORC +void +video_orc_convert_A420_BGRA (guint8 * ORC_RESTRICT d1, + const guint8 * ORC_RESTRICT s1, const guint8 * ORC_RESTRICT s2, + const guint8 * ORC_RESTRICT s3, const guint8 * ORC_RESTRICT s4, int p1, + int p2, int p3, int p4, int p5, int n) +{ + int i; + orc_union32 *ORC_RESTRICT ptr0; + const orc_int8 *ORC_RESTRICT ptr4; + const orc_int8 *ORC_RESTRICT ptr5; + const orc_int8 *ORC_RESTRICT ptr6; + const orc_int8 *ORC_RESTRICT ptr7; + orc_int8 var43; +#if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__) + volatile orc_int8 var44; +#else + orc_int8 var44; +#endif + orc_int8 var45; + orc_union16 var46; + orc_union16 var47; + orc_union16 var48; + orc_union16 var49; + orc_union16 var50; +#if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__) + volatile orc_union32 var51; +#else + orc_union32 var51; +#endif + orc_union32 var52; + orc_int8 var53; + orc_union16 var54; + orc_int8 var55; + orc_int8 var56; + orc_union16 var57; + orc_int8 var58; + orc_int8 var59; + orc_union16 var60; + orc_int8 var61; + orc_union16 var62; + orc_union16 var63; + orc_union16 var64; + orc_int8 var65; + orc_union16 var66; + orc_union16 var67; + orc_union16 var68; + orc_int8 var69; + orc_union16 var70; + orc_union16 var71; + orc_union16 var72; + orc_union16 var73; + orc_int8 var74; + orc_union16 var75; + orc_union32 var76; + + ptr0 = (orc_union32 *) d1; + ptr4 = (orc_int8 *) s1; + ptr5 = (orc_int8 *) s2; + ptr6 = (orc_int8 *) s3; + ptr7 = (orc_int8 *) s4; + + /* 1: loadpb */ + var44 = 0x00000080; /* 128 or 6.32404e-322f */ + /* 12: loadpw */ + var46.i = p1; + /* 14: loadpw */ + var47.i = p2; + /* 19: loadpw */ + var48.i = p3; + /* 23: loadpw */ + var49.i = p4; + /* 26: loadpw */ + var50.i = p5; + /* 32: loadpb */ + var51.x4[0] = 0x00000080; /* 128 or 6.32404e-322f */ + var51.x4[1] = 0x00000080; /* 128 or 6.32404e-322f */ + var51.x4[2] = 0x00000080; /* 128 or 6.32404e-322f */ + var51.x4[3] = 0x00000080; /* 128 or 6.32404e-322f */ + + for (i = 0; i < n; i++) { + /* 0: loadb */ + var43 = ptr4[i]; + /* 2: subb */ + var53 = var43 - var44; + /* 3: splatbw */ + var54.i = ((var53 & 0xff) << 8) | (var53 & 0xff); + /* 4: loadupdb */ + var55 = ptr5[i >> 1]; + /* 5: subb */ + var56 = var55 - var44; + /* 6: splatbw */ + var57.i = ((var56 & 0xff) << 8) | (var56 & 0xff); + /* 7: loadupdb */ + var58 = ptr6[i >> 1]; + /* 8: subb */ + var59 = var58 - var44; + /* 9: splatbw */ + var60.i = ((var59 & 0xff) << 8) | (var59 & 0xff); + /* 10: loadb */ + var45 = ptr7[i]; + /* 11: subb */ + var61 = var45 - var44; + /* 13: mulhsw */ + var62.i = (var54.i * var46.i) >> 16; + /* 15: mulhsw */ + var63.i = (var60.i * var47.i) >> 16; + /* 16: addw */ + var64.i = var62.i + var63.i; + /* 17: convssswb */ + var65 = ORC_CLAMP_SB (var64.i); + /* 18: mergebw */ + { + orc_union16 _dest; + _dest.x2[0] = var65; + _dest.x2[1] = var61; + var66.i = _dest.i; + } + /* 20: mulhsw */ + var67.i = (var57.i * var48.i) >> 16; + /* 21: addw */ + var68.i = var62.i + var67.i; + /* 22: convssswb */ + var69 = ORC_CLAMP_SB (var68.i); + /* 24: mulhsw */ + var70.i = (var57.i * var49.i) >> 16; + /* 25: addw */ + var71.i = var62.i + var70.i; + /* 27: mulhsw */ + var72.i = (var60.i * var50.i) >> 16; + /* 28: addw */ + var73.i = var71.i + var72.i; + /* 29: convssswb */ + var74 = ORC_CLAMP_SB (var73.i); + /* 30: mergebw */ + { + orc_union16 _dest; + _dest.x2[0] = var69; + _dest.x2[1] = var74; + var75.i = _dest.i; + } + /* 31: mergewl */ + { + orc_union32 _dest; + _dest.x2[0] = var75.i; + _dest.x2[1] = var66.i; + var76.i = _dest.i; + } + /* 33: addb */ + var52.x4[0] = var76.x4[0] + var51.x4[0]; + var52.x4[1] = var76.x4[1] + var51.x4[1]; + var52.x4[2] = var76.x4[2] + var51.x4[2]; + var52.x4[3] = var76.x4[3] + var51.x4[3]; + /* 34: storel */ + ptr0[i] = var52; + } + +} + +#else +static void +_backup_video_orc_convert_A420_BGRA (OrcExecutor * ORC_RESTRICT ex) +{ + int i; + int n = ex->n; + orc_union32 *ORC_RESTRICT ptr0; + const orc_int8 *ORC_RESTRICT ptr4; + const orc_int8 *ORC_RESTRICT ptr5; + const orc_int8 *ORC_RESTRICT ptr6; + const orc_int8 *ORC_RESTRICT ptr7; + orc_int8 var43; +#if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__) + volatile orc_int8 var44; +#else + orc_int8 var44; +#endif + orc_int8 var45; + orc_union16 var46; + orc_union16 var47; + orc_union16 var48; + orc_union16 var49; + orc_union16 var50; +#if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__) + volatile orc_union32 var51; +#else + orc_union32 var51; +#endif + orc_union32 var52; + orc_int8 var53; + orc_union16 var54; + orc_int8 var55; + orc_int8 var56; + orc_union16 var57; + orc_int8 var58; + orc_int8 var59; + orc_union16 var60; + orc_int8 var61; + orc_union16 var62; + orc_union16 var63; + orc_union16 var64; + orc_int8 var65; + orc_union16 var66; + orc_union16 var67; + orc_union16 var68; + orc_int8 var69; + orc_union16 var70; + orc_union16 var71; + orc_union16 var72; + orc_union16 var73; + orc_int8 var74; + orc_union16 var75; + orc_union32 var76; + + ptr0 = (orc_union32 *) ex->arrays[0]; + ptr4 = (orc_int8 *) ex->arrays[4]; + ptr5 = (orc_int8 *) ex->arrays[5]; + ptr6 = (orc_int8 *) ex->arrays[6]; + ptr7 = (orc_int8 *) ex->arrays[7]; + + /* 1: loadpb */ + var44 = 0x00000080; /* 128 or 6.32404e-322f */ + /* 12: loadpw */ + var46.i = ex->params[24]; + /* 14: loadpw */ + var47.i = ex->params[25]; + /* 19: loadpw */ + var48.i = ex->params[26]; + /* 23: loadpw */ + var49.i = ex->params[27]; + /* 26: loadpw */ + var50.i = ex->params[28]; + /* 32: loadpb */ + var51.x4[0] = 0x00000080; /* 128 or 6.32404e-322f */ + var51.x4[1] = 0x00000080; /* 128 or 6.32404e-322f */ + var51.x4[2] = 0x00000080; /* 128 or 6.32404e-322f */ + var51.x4[3] = 0x00000080; /* 128 or 6.32404e-322f */ + + for (i = 0; i < n; i++) { + /* 0: loadb */ + var43 = ptr4[i]; + /* 2: subb */ + var53 = var43 - var44; + /* 3: splatbw */ + var54.i = ((var53 & 0xff) << 8) | (var53 & 0xff); + /* 4: loadupdb */ + var55 = ptr5[i >> 1]; + /* 5: subb */ + var56 = var55 - var44; + /* 6: splatbw */ + var57.i = ((var56 & 0xff) << 8) | (var56 & 0xff); + /* 7: loadupdb */ + var58 = ptr6[i >> 1]; + /* 8: subb */ + var59 = var58 - var44; + /* 9: splatbw */ + var60.i = ((var59 & 0xff) << 8) | (var59 & 0xff); + /* 10: loadb */ + var45 = ptr7[i]; + /* 11: subb */ + var61 = var45 - var44; + /* 13: mulhsw */ + var62.i = (var54.i * var46.i) >> 16; + /* 15: mulhsw */ + var63.i = (var60.i * var47.i) >> 16; + /* 16: addw */ + var64.i = var62.i + var63.i; + /* 17: convssswb */ + var65 = ORC_CLAMP_SB (var64.i); + /* 18: mergebw */ + { + orc_union16 _dest; + _dest.x2[0] = var65; + _dest.x2[1] = var61; + var66.i = _dest.i; + } + /* 20: mulhsw */ + var67.i = (var57.i * var48.i) >> 16; + /* 21: addw */ + var68.i = var62.i + var67.i; + /* 22: convssswb */ + var69 = ORC_CLAMP_SB (var68.i); + /* 24: mulhsw */ + var70.i = (var57.i * var49.i) >> 16; + /* 25: addw */ + var71.i = var62.i + var70.i; + /* 27: mulhsw */ + var72.i = (var60.i * var50.i) >> 16; + /* 28: addw */ + var73.i = var71.i + var72.i; + /* 29: convssswb */ + var74 = ORC_CLAMP_SB (var73.i); + /* 30: mergebw */ + { + orc_union16 _dest; + _dest.x2[0] = var69; + _dest.x2[1] = var74; + var75.i = _dest.i; + } + /* 31: mergewl */ + { + orc_union32 _dest; + _dest.x2[0] = var75.i; + _dest.x2[1] = var66.i; + var76.i = _dest.i; + } + /* 33: addb */ + var52.x4[0] = var76.x4[0] + var51.x4[0]; + var52.x4[1] = var76.x4[1] + var51.x4[1]; + var52.x4[2] = var76.x4[2] + var51.x4[2]; + var52.x4[3] = var76.x4[3] + var51.x4[3]; + /* 34: storel */ + ptr0[i] = var52; + } + +} + +void +video_orc_convert_A420_BGRA (guint8 * ORC_RESTRICT d1, + const guint8 * ORC_RESTRICT s1, const guint8 * ORC_RESTRICT s2, + const guint8 * ORC_RESTRICT s3, const guint8 * ORC_RESTRICT s4, int p1, + int p2, int p3, int p4, int p5, int n) +{ + OrcExecutor _ex, *ex = &_ex; + static volatile int p_inited = 0; + static OrcCode *c = 0; + void (*func) (OrcExecutor *); + + if (!p_inited) { + orc_once_mutex_lock (); + if (!p_inited) { + OrcProgram *p; + +#if 1 + static const orc_uint8 bc[] = { + 1, 9, 27, 118, 105, 100, 101, 111, 95, 111, 114, 99, 95, 99, 111, 110, + 118, 101, 114, 116, 95, 65, 52, 50, 48, 95, 66, 71, 82, 65, 11, 4, + 4, 12, 1, 1, 12, 1, 1, 12, 1, 1, 12, 1, 1, 14, 1, 128, + 0, 0, 0, 16, 2, 16, 2, 16, 2, 16, 2, 16, 2, 20, 2, 20, + 2, 20, 2, 20, 2, 20, 2, 20, 2, 20, 1, 20, 1, 20, 1, 20, + 1, 20, 4, 65, 38, 4, 16, 151, 32, 38, 45, 38, 5, 65, 38, 38, + 16, 151, 33, 38, 45, 38, 6, 65, 38, 38, 16, 151, 34, 38, 65, 41, + 7, 16, 90, 32, 32, 24, 90, 35, 34, 25, 70, 35, 32, 35, 159, 38, + 35, 196, 35, 38, 41, 90, 37, 33, 26, 70, 37, 32, 37, 159, 40, 37, + 90, 36, 33, 27, 70, 36, 32, 36, 90, 32, 34, 28, 70, 36, 36, 32, + 159, 39, 36, 196, 37, 40, 39, 195, 42, 37, 35, 21, 2, 33, 0, 42, + 16, 2, 0, + }; + p = orc_program_new_from_static_bytecode (bc); + orc_program_set_backup_function (p, _backup_video_orc_convert_A420_BGRA); +#else + p = orc_program_new (); + orc_program_set_name (p, "video_orc_convert_A420_BGRA"); + orc_program_set_backup_function (p, _backup_video_orc_convert_A420_BGRA); + orc_program_add_destination (p, 4, "d1"); + orc_program_add_source (p, 1, "s1"); + orc_program_add_source (p, 1, "s2"); + orc_program_add_source (p, 1, "s3"); + orc_program_add_source (p, 1, "s4"); + orc_program_add_constant (p, 1, 0x00000080, "c1"); + orc_program_add_parameter (p, 2, "p1"); + orc_program_add_parameter (p, 2, "p2"); + orc_program_add_parameter (p, 2, "p3"); + orc_program_add_parameter (p, 2, "p4"); + orc_program_add_parameter (p, 2, "p5"); + orc_program_add_temporary (p, 2, "t1"); + orc_program_add_temporary (p, 2, "t2"); + orc_program_add_temporary (p, 2, "t3"); + orc_program_add_temporary (p, 2, "t4"); + orc_program_add_temporary (p, 2, "t5"); + orc_program_add_temporary (p, 2, "t6"); + orc_program_add_temporary (p, 1, "t7"); + orc_program_add_temporary (p, 1, "t8"); + orc_program_add_temporary (p, 1, "t9"); + orc_program_add_temporary (p, 1, "t10"); + orc_program_add_temporary (p, 4, "t11"); + + orc_program_append_2 (p, "subb", 0, ORC_VAR_T7, ORC_VAR_S1, ORC_VAR_C1, + ORC_VAR_D1); + orc_program_append_2 (p, "splatbw", 0, ORC_VAR_T1, ORC_VAR_T7, ORC_VAR_D1, + ORC_VAR_D1); + orc_program_append_2 (p, "loadupdb", 0, ORC_VAR_T7, ORC_VAR_S2, + ORC_VAR_D1, ORC_VAR_D1); + orc_program_append_2 (p, "subb", 0, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_C1, + ORC_VAR_D1); + orc_program_append_2 (p, "splatbw", 0, ORC_VAR_T2, ORC_VAR_T7, ORC_VAR_D1, + ORC_VAR_D1); + orc_program_append_2 (p, "loadupdb", 0, ORC_VAR_T7, ORC_VAR_S3, + ORC_VAR_D1, ORC_VAR_D1); + orc_program_append_2 (p, "subb", 0, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_C1, + ORC_VAR_D1); + orc_program_append_2 (p, "splatbw", 0, ORC_VAR_T3, ORC_VAR_T7, ORC_VAR_D1, + ORC_VAR_D1); + orc_program_append_2 (p, "subb", 0, ORC_VAR_T10, ORC_VAR_S4, ORC_VAR_C1, + ORC_VAR_D1); + orc_program_append_2 (p, "mulhsw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P1, + ORC_VAR_D1); + orc_program_append_2 (p, "mulhsw", 0, ORC_VAR_T4, ORC_VAR_T3, ORC_VAR_P2, + ORC_VAR_D1); + orc_program_append_2 (p, "addw", 0, ORC_VAR_T4, ORC_VAR_T1, ORC_VAR_T4, + ORC_VAR_D1); + orc_program_append_2 (p, "convssswb", 0, ORC_VAR_T7, ORC_VAR_T4, + ORC_VAR_D1, ORC_VAR_D1); + orc_program_append_2 (p, "mergebw", 0, ORC_VAR_T4, ORC_VAR_T7, + ORC_VAR_T10, ORC_VAR_D1); + orc_program_append_2 (p, "mulhsw", 0, ORC_VAR_T6, ORC_VAR_T2, ORC_VAR_P3, + ORC_VAR_D1); + orc_program_append_2 (p, "addw", 0, ORC_VAR_T6, ORC_VAR_T1, ORC_VAR_T6, + ORC_VAR_D1); + orc_program_append_2 (p, "convssswb", 0, ORC_VAR_T9, ORC_VAR_T6, + ORC_VAR_D1, ORC_VAR_D1); + orc_program_append_2 (p, "mulhsw", 0, ORC_VAR_T5, ORC_VAR_T2, ORC_VAR_P4, + ORC_VAR_D1); + orc_program_append_2 (p, "addw", 0, ORC_VAR_T5, ORC_VAR_T1, ORC_VAR_T5, + ORC_VAR_D1); + orc_program_append_2 (p, "mulhsw", 0, ORC_VAR_T1, ORC_VAR_T3, ORC_VAR_P5, + ORC_VAR_D1); + orc_program_append_2 (p, "addw", 0, ORC_VAR_T5, ORC_VAR_T5, ORC_VAR_T1, + ORC_VAR_D1); + orc_program_append_2 (p, "convssswb", 0, ORC_VAR_T8, ORC_VAR_T5, + ORC_VAR_D1, ORC_VAR_D1); + orc_program_append_2 (p, "mergebw", 0, ORC_VAR_T6, ORC_VAR_T9, ORC_VAR_T8, + ORC_VAR_D1); + orc_program_append_2 (p, "mergewl", 0, ORC_VAR_T11, ORC_VAR_T6, + ORC_VAR_T4, ORC_VAR_D1); + orc_program_append_2 (p, "addb", 2, ORC_VAR_D1, ORC_VAR_T11, ORC_VAR_C1, + ORC_VAR_D1); +#endif + + orc_program_compile (p); + c = orc_program_take_code (p); + orc_program_free (p); + } + p_inited = TRUE; + orc_once_mutex_unlock (); + } + ex->arrays[ORC_VAR_A2] = c; + ex->program = 0; + + ex->n = n; + ex->arrays[ORC_VAR_D1] = d1; + ex->arrays[ORC_VAR_S1] = (void *) s1; + ex->arrays[ORC_VAR_S2] = (void *) s2; + ex->arrays[ORC_VAR_S3] = (void *) s3; + ex->arrays[ORC_VAR_S4] = (void *) s4; + ex->params[ORC_VAR_P1] = p1; + ex->params[ORC_VAR_P2] = p2; + ex->params[ORC_VAR_P3] = p3; + ex->params[ORC_VAR_P4] = p4; + ex->params[ORC_VAR_P5] = p5; + + func = c->exec; + func (ex); +} +#endif + + /* video_orc_matrix8 */ #ifdef DISABLE_ORC void diff --git a/gst-libs/gst/video/video-orc-dist.h b/gst-libs/gst/video/video-orc-dist.h index 19fb7aae5..4f060205b 100644 --- a/gst-libs/gst/video/video-orc-dist.h +++ b/gst-libs/gst/video/video-orc-dist.h @@ -1,8 +1,7 @@ /* autogenerated from video-orc.orc */ -#ifndef _VIDEO_ORC_H_ -#define _VIDEO_ORC_H_ +#pragma once #include <glib.h> @@ -189,6 +188,8 @@ void video_orc_convert_AYUV_ABGR (guint8 * ORC_RESTRICT d1, int d1_stride, const void video_orc_convert_AYUV_RGBA (guint8 * ORC_RESTRICT d1, int d1_stride, const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int p2, int p3, int p4, int p5, int n, int m); void video_orc_convert_I420_BGRA (guint8 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, const guint8 * ORC_RESTRICT s2, const guint8 * ORC_RESTRICT s3, int p1, int p2, int p3, int p4, int p5, int n); void video_orc_convert_I420_ARGB (guint8 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, const guint8 * ORC_RESTRICT s2, const guint8 * ORC_RESTRICT s3, int p1, int p2, int p3, int p4, int p5, int n); +void video_orc_convert_A420_ARGB (guint8 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, const guint8 * ORC_RESTRICT s2, const guint8 * ORC_RESTRICT s3, const guint8 * ORC_RESTRICT s4, int p1, int p2, int p3, int p4, int p5, int n); +void video_orc_convert_A420_BGRA (guint8 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, const guint8 * ORC_RESTRICT s2, const guint8 * ORC_RESTRICT s3, const guint8 * ORC_RESTRICT s4, int p1, int p2, int p3, int p4, int p5, int n); void video_orc_matrix8 (guint8 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, orc_int64 p1, orc_int64 p2, orc_int64 p3, orc_int64 p4, int n); void _custom_video_orc_matrix8 (guint8 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, orc_int64 p1, orc_int64 p2, orc_int64 p3, orc_int64 p4, int n); void video_orc_resample_h_near_u32_lq (guint32 * ORC_RESTRICT d1, const guint32 * ORC_RESTRICT s1, int p1, int p2, int n); @@ -242,5 +243,3 @@ void video_orc_convert_UYVY_GRAY8 (guint8 * ORC_RESTRICT d1, int d1_stride, cons } #endif -#endif - diff --git a/gst-libs/gst/video/video-orc.orc b/gst-libs/gst/video/video-orc.orc index 03a302f1e..4d88844ba 100644 --- a/gst-libs/gst/video/video-orc.orc +++ b/gst-libs/gst/video/video-orc.orc @@ -1876,6 +1876,118 @@ mergebw wb, g, b mergewl x, wr, wb x4 addb argb, x, c4128 +.function video_orc_convert_A420_ARGB +.dest 4 argb guint8 +.source 1 y guint8 +.source 1 u guint8 +.source 1 v guint8 +.source 1 a guint8 +.param 2 p1 +.param 2 p2 +.param 2 p3 +.param 2 p4 +.param 2 p5 +.temp 2 wy +.temp 2 wu +.temp 2 wv +.temp 2 wr +.temp 2 wg +.temp 2 wb +.temp 1 r +.temp 1 g +.temp 1 b +.temp 1 da +.temp 4 x +.const 1 c128 128 + +subb r, y, c128 +splatbw wy, r +loadupdb r, u +subb r, r, c128 +splatbw wu, r +loadupdb r, v +subb r, r, c128 +splatbw wv, r +subb da, a, c128 + +mulhsw wy, wy, p1 + +mulhsw wr, wv, p2 +addw wr, wy, wr +convssswb r, wr +mergebw wr, da, r + +mulhsw wb, wu, p3 +addw wb, wy, wb +convssswb b, wb + +mulhsw wg, wu, p4 +addw wg, wy, wg +mulhsw wy, wv, p5 +addw wg, wg, wy + +convssswb g, wg + +mergebw wb, g, b +mergewl x, wr, wb +x4 addb argb, x, c128 + +.function video_orc_convert_A420_BGRA +.dest 4 argb guint8 +.source 1 y guint8 +.source 1 u guint8 +.source 1 v guint8 +.source 1 a guint8 +.param 2 p1 +.param 2 p2 +.param 2 p3 +.param 2 p4 +.param 2 p5 +.temp 2 wy +.temp 2 wu +.temp 2 wv +.temp 2 wr +.temp 2 wg +.temp 2 wb +.temp 1 r +.temp 1 g +.temp 1 b +.temp 1 da +.temp 4 x +.const 1 c128 128 + +subb r, y, c128 +splatbw wy, r +loadupdb r, u +subb r, r, c128 +splatbw wu, r +loadupdb r, v +subb r, r, c128 +splatbw wv, r +subb da, a, c128 + +mulhsw wy, wy, p1 + +mulhsw wr, wv, p2 +addw wr, wy, wr +convssswb r, wr +mergebw wr, r, da + +mulhsw wb, wu, p3 +addw wb, wy, wb +convssswb b, wb + +mulhsw wg, wu, p4 +addw wg, wy, wg +mulhsw wy, wv, p5 +addw wg, wg, wy + +convssswb g, wg + +mergebw wb, b, g +mergewl x, wb, wr +x4 addb argb, x, c128 + .function video_orc_matrix8 .backup _custom_video_orc_matrix8 .source 4 argb guint8 |