diff options
author | Vincent Penquerc'h <ogg.k.ogg.k@googlemail.com> | 2011-09-29 22:43:30 +0100 |
---|---|---|
committer | Vincent Penquerc'h <vincent.penquerch@collabora.co.uk> | 2011-11-28 15:28:39 +0000 |
commit | 9eb79984a88d7eac96806c8b9999f16fa90c80a6 (patch) | |
tree | 664eed9f3a853fdcfc3efed987f50d1039fc4956 /ext/kate | |
parent | 4735a7554b7aebecc6f7469265240b9a4ba4a4ba (diff) | |
download | gstreamer-plugins-bad-9eb79984a88d7eac96806c8b9999f16fa90c80a6.tar.gz |
kate: support for rendering on several YUV formats
This speeds up rendering a fair bit by not requiring colorspace
conversion, whether there is anything to overlay or not.
The blending code was nicked from textoverlay. I would think
this might be a helpful thing to put in, say, libgstvideo at
some point.
https://bugzilla.gnome.org/show_bug.cgi?id=660528
Diffstat (limited to 'ext/kate')
-rw-r--r-- | ext/kate/gstkatetiger.c | 487 | ||||
-rw-r--r-- | ext/kate/gstkatetiger.h | 3 |
2 files changed, 483 insertions, 7 deletions
diff --git a/ext/kate/gstkatetiger.c b/ext/kate/gstkatetiger.c index 39821c336..8cdd7a7c0 100644 --- a/ext/kate/gstkatetiger.c +++ b/ext/kate/gstkatetiger.c @@ -3,6 +3,7 @@ * Copyright 2005 Thomas Vander Stichele <thomas@apestaart.org> * Copyright 2005 Ronald S. Bultje <rbultje@ronald.bitfreak.net> * Copyright 2008 Vincent Penquerc'h <ogg.k.ogg.k@googlemail.com> + * Copyright (C) <2009> Young-Ho Cha <ganadist@gmail.com> * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -125,6 +126,57 @@ enum ARG_SILENT }; +/* RGB -> YUV blitting routines taken from textoverlay, + original code from Young-Ho Cha <ganadist@gmail.com> */ + +#define COMP_Y(ret, r, g, b) \ +{ \ + ret = (int) (((19595 * r) >> 16) + ((38470 * g) >> 16) + ((7471 * b) >> 16)); \ + ret = CLAMP (ret, 0, 255); \ +} + +#define COMP_U(ret, r, g, b) \ +{ \ + ret = (int) (-((11059 * r) >> 16) - ((21709 * g) >> 16) + ((32768 * b) >> 16) + 128); \ + ret = CLAMP (ret, 0, 255); \ +} + +#define COMP_V(ret, r, g, b) \ +{ \ + ret = (int) (((32768 * r) >> 16) - ((27439 * g) >> 16) - ((5329 * b) >> 16) + 128); \ + ret = CLAMP (ret, 0, 255); \ +} + +#define BLEND(ret, alpha, v0, v1) \ +{ \ + ret = (v0 * alpha + v1 * (255 - alpha)) / 255; \ +} + +#define OVER(ret, alphaA, Ca, alphaB, Cb, alphaNew) \ +{ \ + gint _tmp; \ + _tmp = (Ca * alphaA + Cb * alphaB * (255 - alphaA) / 255) / alphaNew; \ + ret = CLAMP (_tmp, 0, 255); \ +} + +#if G_BYTE_ORDER == G_LITTLE_ENDIAN +# define TIGER_ARGB_A 3 +# define TIGER_ARGB_R 2 +# define TIGER_ARGB_G 1 +# define TIGER_ARGB_B 0 +#else +# define TIGER_ARGB_A 0 +# define TIGER_ARGB_R 1 +# define TIGER_ARGB_G 2 +# define TIGER_ARGB_B 3 +#endif + +#define TIGER_UNPREMULTIPLY(a,r,g,b) G_STMT_START { \ + b = (a > 0) ? MIN ((b * 255 + a / 2) / a, 255) : 0; \ + g = (a > 0) ? MIN ((g * 255 + a / 2) / a, 255) : 0; \ + r = (a > 0) ? MIN ((r * 255 + a / 2) / a, 255) : 0; \ +} G_STMT_END + static GstStaticPadTemplate kate_sink_factory = GST_STATIC_PAD_TEMPLATE ("subtitle_sink", GST_PAD_SINK, @@ -134,12 +186,12 @@ static GstStaticPadTemplate kate_sink_factory = #if G_BYTE_ORDER == G_LITTLE_ENDIAN #define TIGER_VIDEO_CAPS \ - GST_VIDEO_CAPS_xRGB ", endianness = (int)1234; " \ - GST_VIDEO_CAPS_BGRx ", endianness = (int)4321" + GST_VIDEO_CAPS_xRGB ";" GST_VIDEO_CAPS_BGRx ";" \ + GST_VIDEO_CAPS_YUV ("{AYUV, I420, YV12, UYVY, NV12, NV21}") #else #define TIGER_VIDEO_CAPS \ - GST_VIDEO_CAPS_BGRx ", endianness = (int)4321; " \ - GST_VIDEO_CAPS_xRGB ", endianness = (int)1234" + GST_VIDEO_CAPS_BGRx ";" GST_VIDEO_CAPS_xRGB ";" \ + GST_VIDEO_CAPS_YUV ("{AYUV, I420, YV12, UYVY, NV12, NV21}") #endif static GstStaticPadTemplate video_sink_factory = @@ -379,6 +431,9 @@ gst_kate_tiger_dispose (GObject * object) tiger->default_font_desc = NULL; } + g_free (tiger->render_buffer); + tiger->render_buffer = NULL; + g_cond_free (tiger->cond); tiger->cond = NULL; @@ -707,6 +762,7 @@ gst_kate_tiger_video_set_caps (GstPad * pad, GstCaps * caps) tiger->swap_rgb = FALSE; if (gst_video_format_parse_caps (caps, &format, &w, &h)) { + tiger->video_format = format; tiger->video_width = w; tiger->video_height = h; } @@ -731,6 +787,405 @@ gst_kate_tiger_get_time (GstKateTiger * tiger) return pos / (gdouble) GST_SECOND; } +static inline void +gst_kate_tiger_blit_1 (GstKateTiger * tiger, guchar * dest, gint xpos, + gint ypos, const guint8 * image, gint image_width, gint image_height, + guint dest_stride) +{ + gint i, j = 0; + gint x, y; + guchar r, g, b, a; + const guint8 *pimage; + guchar *py; + gint width = image_width; + gint height = image_height; + + if (xpos < 0) { + xpos = 0; + } + + if (xpos + width > tiger->video_width) { + width = tiger->video_width - xpos; + } + + if (ypos + height > tiger->video_height) { + height = tiger->video_height - ypos; + } + + dest += (ypos / 1) * dest_stride; + + for (i = 0; i < height; i++) { + pimage = image + 4 * (i * image_width); + py = dest + i * dest_stride + xpos; + for (j = 0; j < width; j++) { + b = pimage[TIGER_ARGB_B]; + g = pimage[TIGER_ARGB_G]; + r = pimage[TIGER_ARGB_R]; + a = pimage[TIGER_ARGB_A]; + TIGER_UNPREMULTIPLY (a, r, g, b); + + pimage += 4; + if (a == 0) { + py++; + continue; + } + COMP_Y (y, r, g, b); + x = *py; + BLEND (*py++, a, y, x); + } + } +} + +static inline void +gst_kate_tiger_blit_sub2x2cbcr (GstKateTiger * tiger, + guchar * destcb, guchar * destcr, gint xpos, gint ypos, + const guint8 * image, gint image_width, gint image_height, + guint destcb_stride, guint destcr_stride, guint pix_stride) +{ + gint i, j; + gint x, cb, cr; + gushort r, g, b, a; + gushort r1, g1, b1, a1; + const guint8 *pimage1, *pimage2; + guchar *pcb, *pcr; + gint width = image_width - 2; + gint height = image_height - 2; + + xpos *= pix_stride; + + if (xpos < 0) { + xpos = 0; + } + + if (xpos + width > tiger->video_width) { + width = tiger->video_width - xpos; + } + + if (ypos + height > tiger->video_height) { + height = tiger->video_height - ypos; + } + + destcb += (ypos / 2) * destcb_stride; + destcr += (ypos / 2) * destcr_stride; + + for (i = 0; i < height; i += 2) { + pimage1 = image + 4 * (i * image_width); + pimage2 = pimage1 + 4 * image_width; + pcb = destcb + (i / 2) * destcb_stride + xpos / 2; + pcr = destcr + (i / 2) * destcr_stride + xpos / 2; + for (j = 0; j < width; j += 2) { + b = pimage1[TIGER_ARGB_B]; + g = pimage1[TIGER_ARGB_G]; + r = pimage1[TIGER_ARGB_R]; + a = pimage1[TIGER_ARGB_A]; + TIGER_UNPREMULTIPLY (a, r, g, b); + pimage1 += 4; + + b1 = pimage1[TIGER_ARGB_B]; + g1 = pimage1[TIGER_ARGB_G]; + r1 = pimage1[TIGER_ARGB_R]; + a1 = pimage1[TIGER_ARGB_A]; + TIGER_UNPREMULTIPLY (a1, r1, g1, b1); + b += b1; + g += g1; + r += r1; + a += a1; + pimage1 += 4; + + b1 = pimage2[TIGER_ARGB_B]; + g1 = pimage2[TIGER_ARGB_G]; + r1 = pimage2[TIGER_ARGB_R]; + a1 = pimage2[TIGER_ARGB_A]; + TIGER_UNPREMULTIPLY (a1, r1, g1, b1); + b += b1; + g += g1; + r += r1; + a += a1; + pimage2 += 4; + + /* + 2 for rounding */ + b1 = pimage2[TIGER_ARGB_B]; + g1 = pimage2[TIGER_ARGB_G]; + r1 = pimage2[TIGER_ARGB_R]; + a1 = pimage2[TIGER_ARGB_A]; + TIGER_UNPREMULTIPLY (a1, r1, g1, b1); + b += b1 + 2; + g += g1 + 2; + r += r1 + 2; + a += a1 + 2; + pimage2 += 4; + + b /= 4; + g /= 4; + r /= 4; + a /= 4; + + if (a == 0) { + pcb += pix_stride; + pcr += pix_stride; + continue; + } + COMP_U (cb, r, g, b); + COMP_V (cr, r, g, b); + + x = *pcb; + BLEND (*pcb, a, cb, x); + x = *pcr; + BLEND (*pcr, a, cr, x); + + pcb += pix_stride; + pcr += pix_stride; + } + } +} + +/* FIXME: + * - use proper strides and offset for I420 + */ + +static inline void +gst_kate_tiger_blit_NV12_NV21 (GstKateTiger * tiger, + guint8 * yuv_pixels, gint xpos, gint ypos, const guint8 * image, + gint image_width, gint image_height) +{ + int y_stride, uv_stride; + int u_offset, v_offset; + int h, w; + + /* because U/V is 2x2 subsampled, we need to round, either up or down, + * to a boundary of integer number of U/V pixels: + */ + xpos = GST_ROUND_UP_2 (xpos); + ypos = GST_ROUND_UP_2 (ypos); + + w = tiger->video_width; + h = tiger->video_height; + + y_stride = gst_video_format_get_row_stride (tiger->video_format, 0, w); + uv_stride = gst_video_format_get_row_stride (tiger->video_format, 1, w); + u_offset = + gst_video_format_get_component_offset (tiger->video_format, 1, w, h); + v_offset = + gst_video_format_get_component_offset (tiger->video_format, 2, w, h); + + gst_kate_tiger_blit_1 (tiger, yuv_pixels, xpos, ypos, image, image_width, + image_height, y_stride); + gst_kate_tiger_blit_sub2x2cbcr (tiger, yuv_pixels + u_offset, + yuv_pixels + v_offset, xpos, ypos, image, image_width, image_height, + uv_stride, uv_stride, 2); +} + +static inline void +gst_kate_tiger_blit_I420_YV12 (GstKateTiger * tiger, + guint8 * yuv_pixels, gint xpos, gint ypos, const guint8 * image, + gint image_width, gint image_height) +{ + int y_stride, u_stride, v_stride; + int u_offset, v_offset; + int h, w; + + /* because U/V is 2x2 subsampled, we need to round, either up or down, + * to a boundary of integer number of U/V pixels: + */ + xpos = GST_ROUND_UP_2 (xpos); + ypos = GST_ROUND_UP_2 (ypos); + + w = tiger->video_width; + h = tiger->video_height; + + y_stride = gst_video_format_get_row_stride (tiger->video_format, 0, w); + u_stride = gst_video_format_get_row_stride (tiger->video_format, 1, w); + v_stride = gst_video_format_get_row_stride (tiger->video_format, 2, w); + u_offset = + gst_video_format_get_component_offset (tiger->video_format, 1, w, h); + v_offset = + gst_video_format_get_component_offset (tiger->video_format, 2, w, h); + + gst_kate_tiger_blit_1 (tiger, yuv_pixels, xpos, ypos, image, image_width, + image_height, y_stride); + gst_kate_tiger_blit_sub2x2cbcr (tiger, yuv_pixels + u_offset, + yuv_pixels + v_offset, xpos, ypos, image, image_width, image_height, + u_stride, v_stride, 1); +} + +static inline void +gst_kate_tiger_blit_UYVY (GstKateTiger * tiger, + guint8 * yuv_pixels, gint xpos, gint ypos, const guint8 * image, + gint image_width, gint image_height) +{ + int a0, r0, g0, b0; + int a1, r1, g1, b1; + int y0, y1, u, v; + int i, j; + int h, w; + const guint8 *pimage; + guchar *dest; + + /* because U/V is 2x horizontally subsampled, we need to round to a + * boundary of integer number of U/V pixels in x dimension: + */ + xpos = GST_ROUND_UP_2 (xpos); + + w = image_width - 2; + h = image_height - 2; + + if (xpos < 0) { + xpos = 0; + } + + if (xpos + w > tiger->video_width) { + w = tiger->video_width - xpos; + } + + if (ypos + h > tiger->video_height) { + h = tiger->video_height - ypos; + } + + for (i = 0; i < h; i++) { + pimage = image + i * image_width * 4; + dest = yuv_pixels + (i + ypos) * tiger->video_width * 2 + xpos * 2; + for (j = 0; j < w; j += 2) { + b0 = pimage[TIGER_ARGB_B]; + g0 = pimage[TIGER_ARGB_G]; + r0 = pimage[TIGER_ARGB_R]; + a0 = pimage[TIGER_ARGB_A]; + TIGER_UNPREMULTIPLY (a0, r0, g0, b0); + pimage += 4; + + b1 = pimage[TIGER_ARGB_B]; + g1 = pimage[TIGER_ARGB_G]; + r1 = pimage[TIGER_ARGB_R]; + a1 = pimage[TIGER_ARGB_A]; + TIGER_UNPREMULTIPLY (a1, r1, g1, b1); + pimage += 4; + + a0 += a1 + 2; + a0 /= 2; + if (a0 == 0) { + dest += 4; + continue; + } + + COMP_Y (y0, r0, g0, b0); + COMP_Y (y1, r1, g1, b1); + + b0 += b1 + 2; + g0 += g1 + 2; + r0 += r1 + 2; + + b0 /= 2; + g0 /= 2; + r0 /= 2; + + COMP_U (u, r0, g0, b0); + COMP_V (v, r0, g0, b0); + + BLEND (*dest, a0, u, *dest); + dest++; + BLEND (*dest, a0, y0, *dest); + dest++; + BLEND (*dest, a0, v, *dest); + dest++; + BLEND (*dest, a0, y1, *dest); + dest++; + } + } +} + +static inline void +gst_kate_tiger_blit_AYUV (GstKateTiger * tiger, + guint8 * rgb_pixels, gint xpos, gint ypos, const guint8 * image, + gint image_width, gint image_height) +{ + int a, r, g, b, a1; + int y, u, v; + int i, j; + int h, w; + const guint8 *pimage; + guchar *dest; + + w = image_width; + h = image_height; + + if (xpos < 0) { + xpos = 0; + } + + if (xpos + w > tiger->video_width) { + w = tiger->video_width - xpos; + } + + if (ypos + h > tiger->video_height) { + h = tiger->video_height - ypos; + } + + for (i = 0; i < h; i++) { + pimage = image + i * image_width * 4; + dest = rgb_pixels + (i + ypos) * 4 * tiger->video_width + xpos * 4; + for (j = 0; j < w; j++) { + a = pimage[TIGER_ARGB_A]; + b = pimage[TIGER_ARGB_B]; + g = pimage[TIGER_ARGB_G]; + r = pimage[TIGER_ARGB_R]; + + TIGER_UNPREMULTIPLY (a, r, g, b); + + // convert background to yuv + COMP_Y (y, r, g, b); + COMP_U (u, r, g, b); + COMP_V (v, r, g, b); + + // preform text "OVER" background alpha compositing + a1 = a + (dest[0] * (255 - a)) / 255 + 1; // add 1 to prevent divide by 0 + OVER (dest[1], a, y, dest[0], dest[1], a1); + OVER (dest[2], a, u, dest[0], dest[2], a1); + OVER (dest[3], a, v, dest[0], dest[3], a1); + dest[0] = a1 - 1; // remove the temporary 1 we added + + pimage += 4; + dest += 4; + } + } +} + +static void +gst_kate_tiger_blend_yuv (GstKateTiger * tiger, GstBuffer * video_frame, + const guint8 * image, gint image_width, gint image_height) +{ + gint xpos = 0, ypos = 0; + gint width, height; + + width = image_width; + height = image_height; + + switch (tiger->video_format) { + case GST_VIDEO_FORMAT_I420: + case GST_VIDEO_FORMAT_YV12: + gst_kate_tiger_blit_I420_YV12 (tiger, + GST_BUFFER_DATA (video_frame), xpos, ypos, image, image_width, + image_height); + break; + case GST_VIDEO_FORMAT_NV12: + case GST_VIDEO_FORMAT_NV21: + gst_kate_tiger_blit_NV12_NV21 (tiger, + GST_BUFFER_DATA (video_frame), xpos, ypos, image, image_width, + image_height); + break; + case GST_VIDEO_FORMAT_UYVY: + gst_kate_tiger_blit_UYVY (tiger, + GST_BUFFER_DATA (video_frame), xpos, ypos, image, image_width, + image_height); + break; + case GST_VIDEO_FORMAT_AYUV: + gst_kate_tiger_blit_AYUV (tiger, + GST_BUFFER_DATA (video_frame), xpos, ypos, image, image_width, + image_height); + break; + default: + g_assert_not_reached (); + } +} + static GstFlowReturn gst_kate_tiger_video_chain (GstPad * pad, GstBuffer * buf) { @@ -757,8 +1212,9 @@ gst_kate_tiger_video_chain (GstPad * pad, GstBuffer * buf) g_cond_broadcast (tiger->cond); } - /* Update first with a dummy buffer pointer we cannot write to. If there is nothing - to draw, we will not have to make it writeable */ + /* Update first with a dummy buffer pointer we cannot write to, but with the + right dimensions. If there is nothing to draw, we will not have to make + it writeable. */ ptr = GST_BUFFER_DATA (buf); ret = tiger_renderer_set_buffer (tiger->tr, ptr, tiger->video_width, @@ -791,7 +1247,19 @@ gst_kate_tiger_video_chain (GstPad * pad, GstBuffer * buf) } /* and setup that buffer before rendering */ - ptr = GST_BUFFER_DATA (buf); + if (gst_video_format_is_yuv (tiger->video_format)) { + guint8 *tmp = g_realloc (tiger->render_buffer, + tiger->video_width * tiger->video_height * 4); + if (!tmp) { + GST_WARNING_OBJECT (tiger, "Failed to allocate render buffer"); + goto pass; + } + tiger->render_buffer = tmp; + ptr = tiger->render_buffer; + tiger_renderer_set_surface_clear_color (tiger->tr, 1, 0.0, 0.0, 0.0, 0.0); + } else { + ptr = GST_BUFFER_DATA (buf); + } ret = tiger_renderer_set_buffer (tiger->tr, ptr, tiger->video_width, tiger->video_height, tiger->video_width * 4, tiger->swap_rgb); @@ -808,6 +1276,11 @@ gst_kate_tiger_video_chain (GstPad * pad, GstBuffer * buf) GST_LOG_OBJECT (tiger, "Tiger renderer rendered on video frame at %f", t); } + if (gst_video_format_is_yuv (tiger->video_format)) { + gst_kate_tiger_blend_yuv (tiger, buf, tiger->render_buffer, + tiger->video_width, tiger->video_height); + } + pass: GST_KATE_TIGER_MUTEX_UNLOCK (tiger); diff --git a/ext/kate/gstkatetiger.h b/ext/kate/gstkatetiger.h index 0947da4cf..f966cbf15 100644 --- a/ext/kate/gstkatetiger.h +++ b/ext/kate/gstkatetiger.h @@ -49,6 +49,7 @@ #include <kate/kate.h> #include <tiger/tiger.h> #include <gst/gst.h> +#include <gst/video/video.h> #include "gstkateutil.h" G_BEGIN_DECLS @@ -90,9 +91,11 @@ struct _GstKateTiger guchar default_background_a; gboolean silent; + GstVideoFormat video_format; gint video_width; gint video_height; gboolean swap_rgb; + guint8 *render_buffer; GMutex *mutex; GCond *cond; |