summaryrefslogtreecommitdiff
path: root/ext/kate
diff options
context:
space:
mode:
authorVincent Penquerc'h <ogg.k.ogg.k@googlemail.com>2011-09-29 22:43:30 +0100
committerVincent Penquerc'h <vincent.penquerch@collabora.co.uk>2011-11-28 15:28:39 +0000
commit9eb79984a88d7eac96806c8b9999f16fa90c80a6 (patch)
tree664eed9f3a853fdcfc3efed987f50d1039fc4956 /ext/kate
parent4735a7554b7aebecc6f7469265240b9a4ba4a4ba (diff)
downloadgstreamer-plugins-bad-9eb79984a88d7eac96806c8b9999f16fa90c80a6.tar.gz
kate: support for rendering on several YUV formats
This speeds up rendering a fair bit by not requiring colorspace conversion, whether there is anything to overlay or not. The blending code was nicked from textoverlay. I would think this might be a helpful thing to put in, say, libgstvideo at some point. https://bugzilla.gnome.org/show_bug.cgi?id=660528
Diffstat (limited to 'ext/kate')
-rw-r--r--ext/kate/gstkatetiger.c487
-rw-r--r--ext/kate/gstkatetiger.h3
2 files changed, 483 insertions, 7 deletions
diff --git a/ext/kate/gstkatetiger.c b/ext/kate/gstkatetiger.c
index 39821c336..8cdd7a7c0 100644
--- a/ext/kate/gstkatetiger.c
+++ b/ext/kate/gstkatetiger.c
@@ -3,6 +3,7 @@
* Copyright 2005 Thomas Vander Stichele <thomas@apestaart.org>
* Copyright 2005 Ronald S. Bultje <rbultje@ronald.bitfreak.net>
* Copyright 2008 Vincent Penquerc'h <ogg.k.ogg.k@googlemail.com>
+ * Copyright (C) <2009> Young-Ho Cha <ganadist@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -125,6 +126,57 @@ enum
ARG_SILENT
};
+/* RGB -> YUV blitting routines taken from textoverlay,
+ original code from Young-Ho Cha <ganadist@gmail.com> */
+
+#define COMP_Y(ret, r, g, b) \
+{ \
+ ret = (int) (((19595 * r) >> 16) + ((38470 * g) >> 16) + ((7471 * b) >> 16)); \
+ ret = CLAMP (ret, 0, 255); \
+}
+
+#define COMP_U(ret, r, g, b) \
+{ \
+ ret = (int) (-((11059 * r) >> 16) - ((21709 * g) >> 16) + ((32768 * b) >> 16) + 128); \
+ ret = CLAMP (ret, 0, 255); \
+}
+
+#define COMP_V(ret, r, g, b) \
+{ \
+ ret = (int) (((32768 * r) >> 16) - ((27439 * g) >> 16) - ((5329 * b) >> 16) + 128); \
+ ret = CLAMP (ret, 0, 255); \
+}
+
+#define BLEND(ret, alpha, v0, v1) \
+{ \
+ ret = (v0 * alpha + v1 * (255 - alpha)) / 255; \
+}
+
+#define OVER(ret, alphaA, Ca, alphaB, Cb, alphaNew) \
+{ \
+ gint _tmp; \
+ _tmp = (Ca * alphaA + Cb * alphaB * (255 - alphaA) / 255) / alphaNew; \
+ ret = CLAMP (_tmp, 0, 255); \
+}
+
+#if G_BYTE_ORDER == G_LITTLE_ENDIAN
+# define TIGER_ARGB_A 3
+# define TIGER_ARGB_R 2
+# define TIGER_ARGB_G 1
+# define TIGER_ARGB_B 0
+#else
+# define TIGER_ARGB_A 0
+# define TIGER_ARGB_R 1
+# define TIGER_ARGB_G 2
+# define TIGER_ARGB_B 3
+#endif
+
+#define TIGER_UNPREMULTIPLY(a,r,g,b) G_STMT_START { \
+ b = (a > 0) ? MIN ((b * 255 + a / 2) / a, 255) : 0; \
+ g = (a > 0) ? MIN ((g * 255 + a / 2) / a, 255) : 0; \
+ r = (a > 0) ? MIN ((r * 255 + a / 2) / a, 255) : 0; \
+} G_STMT_END
+
static GstStaticPadTemplate kate_sink_factory =
GST_STATIC_PAD_TEMPLATE ("subtitle_sink",
GST_PAD_SINK,
@@ -134,12 +186,12 @@ static GstStaticPadTemplate kate_sink_factory =
#if G_BYTE_ORDER == G_LITTLE_ENDIAN
#define TIGER_VIDEO_CAPS \
- GST_VIDEO_CAPS_xRGB ", endianness = (int)1234; " \
- GST_VIDEO_CAPS_BGRx ", endianness = (int)4321"
+ GST_VIDEO_CAPS_xRGB ";" GST_VIDEO_CAPS_BGRx ";" \
+ GST_VIDEO_CAPS_YUV ("{AYUV, I420, YV12, UYVY, NV12, NV21}")
#else
#define TIGER_VIDEO_CAPS \
- GST_VIDEO_CAPS_BGRx ", endianness = (int)4321; " \
- GST_VIDEO_CAPS_xRGB ", endianness = (int)1234"
+ GST_VIDEO_CAPS_BGRx ";" GST_VIDEO_CAPS_xRGB ";" \
+ GST_VIDEO_CAPS_YUV ("{AYUV, I420, YV12, UYVY, NV12, NV21}")
#endif
static GstStaticPadTemplate video_sink_factory =
@@ -379,6 +431,9 @@ gst_kate_tiger_dispose (GObject * object)
tiger->default_font_desc = NULL;
}
+ g_free (tiger->render_buffer);
+ tiger->render_buffer = NULL;
+
g_cond_free (tiger->cond);
tiger->cond = NULL;
@@ -707,6 +762,7 @@ gst_kate_tiger_video_set_caps (GstPad * pad, GstCaps * caps)
tiger->swap_rgb = FALSE;
if (gst_video_format_parse_caps (caps, &format, &w, &h)) {
+ tiger->video_format = format;
tiger->video_width = w;
tiger->video_height = h;
}
@@ -731,6 +787,405 @@ gst_kate_tiger_get_time (GstKateTiger * tiger)
return pos / (gdouble) GST_SECOND;
}
+static inline void
+gst_kate_tiger_blit_1 (GstKateTiger * tiger, guchar * dest, gint xpos,
+ gint ypos, const guint8 * image, gint image_width, gint image_height,
+ guint dest_stride)
+{
+ gint i, j = 0;
+ gint x, y;
+ guchar r, g, b, a;
+ const guint8 *pimage;
+ guchar *py;
+ gint width = image_width;
+ gint height = image_height;
+
+ if (xpos < 0) {
+ xpos = 0;
+ }
+
+ if (xpos + width > tiger->video_width) {
+ width = tiger->video_width - xpos;
+ }
+
+ if (ypos + height > tiger->video_height) {
+ height = tiger->video_height - ypos;
+ }
+
+ dest += (ypos / 1) * dest_stride;
+
+ for (i = 0; i < height; i++) {
+ pimage = image + 4 * (i * image_width);
+ py = dest + i * dest_stride + xpos;
+ for (j = 0; j < width; j++) {
+ b = pimage[TIGER_ARGB_B];
+ g = pimage[TIGER_ARGB_G];
+ r = pimage[TIGER_ARGB_R];
+ a = pimage[TIGER_ARGB_A];
+ TIGER_UNPREMULTIPLY (a, r, g, b);
+
+ pimage += 4;
+ if (a == 0) {
+ py++;
+ continue;
+ }
+ COMP_Y (y, r, g, b);
+ x = *py;
+ BLEND (*py++, a, y, x);
+ }
+ }
+}
+
+static inline void
+gst_kate_tiger_blit_sub2x2cbcr (GstKateTiger * tiger,
+ guchar * destcb, guchar * destcr, gint xpos, gint ypos,
+ const guint8 * image, gint image_width, gint image_height,
+ guint destcb_stride, guint destcr_stride, guint pix_stride)
+{
+ gint i, j;
+ gint x, cb, cr;
+ gushort r, g, b, a;
+ gushort r1, g1, b1, a1;
+ const guint8 *pimage1, *pimage2;
+ guchar *pcb, *pcr;
+ gint width = image_width - 2;
+ gint height = image_height - 2;
+
+ xpos *= pix_stride;
+
+ if (xpos < 0) {
+ xpos = 0;
+ }
+
+ if (xpos + width > tiger->video_width) {
+ width = tiger->video_width - xpos;
+ }
+
+ if (ypos + height > tiger->video_height) {
+ height = tiger->video_height - ypos;
+ }
+
+ destcb += (ypos / 2) * destcb_stride;
+ destcr += (ypos / 2) * destcr_stride;
+
+ for (i = 0; i < height; i += 2) {
+ pimage1 = image + 4 * (i * image_width);
+ pimage2 = pimage1 + 4 * image_width;
+ pcb = destcb + (i / 2) * destcb_stride + xpos / 2;
+ pcr = destcr + (i / 2) * destcr_stride + xpos / 2;
+ for (j = 0; j < width; j += 2) {
+ b = pimage1[TIGER_ARGB_B];
+ g = pimage1[TIGER_ARGB_G];
+ r = pimage1[TIGER_ARGB_R];
+ a = pimage1[TIGER_ARGB_A];
+ TIGER_UNPREMULTIPLY (a, r, g, b);
+ pimage1 += 4;
+
+ b1 = pimage1[TIGER_ARGB_B];
+ g1 = pimage1[TIGER_ARGB_G];
+ r1 = pimage1[TIGER_ARGB_R];
+ a1 = pimage1[TIGER_ARGB_A];
+ TIGER_UNPREMULTIPLY (a1, r1, g1, b1);
+ b += b1;
+ g += g1;
+ r += r1;
+ a += a1;
+ pimage1 += 4;
+
+ b1 = pimage2[TIGER_ARGB_B];
+ g1 = pimage2[TIGER_ARGB_G];
+ r1 = pimage2[TIGER_ARGB_R];
+ a1 = pimage2[TIGER_ARGB_A];
+ TIGER_UNPREMULTIPLY (a1, r1, g1, b1);
+ b += b1;
+ g += g1;
+ r += r1;
+ a += a1;
+ pimage2 += 4;
+
+ /* + 2 for rounding */
+ b1 = pimage2[TIGER_ARGB_B];
+ g1 = pimage2[TIGER_ARGB_G];
+ r1 = pimage2[TIGER_ARGB_R];
+ a1 = pimage2[TIGER_ARGB_A];
+ TIGER_UNPREMULTIPLY (a1, r1, g1, b1);
+ b += b1 + 2;
+ g += g1 + 2;
+ r += r1 + 2;
+ a += a1 + 2;
+ pimage2 += 4;
+
+ b /= 4;
+ g /= 4;
+ r /= 4;
+ a /= 4;
+
+ if (a == 0) {
+ pcb += pix_stride;
+ pcr += pix_stride;
+ continue;
+ }
+ COMP_U (cb, r, g, b);
+ COMP_V (cr, r, g, b);
+
+ x = *pcb;
+ BLEND (*pcb, a, cb, x);
+ x = *pcr;
+ BLEND (*pcr, a, cr, x);
+
+ pcb += pix_stride;
+ pcr += pix_stride;
+ }
+ }
+}
+
+/* FIXME:
+ * - use proper strides and offset for I420
+ */
+
+static inline void
+gst_kate_tiger_blit_NV12_NV21 (GstKateTiger * tiger,
+ guint8 * yuv_pixels, gint xpos, gint ypos, const guint8 * image,
+ gint image_width, gint image_height)
+{
+ int y_stride, uv_stride;
+ int u_offset, v_offset;
+ int h, w;
+
+ /* because U/V is 2x2 subsampled, we need to round, either up or down,
+ * to a boundary of integer number of U/V pixels:
+ */
+ xpos = GST_ROUND_UP_2 (xpos);
+ ypos = GST_ROUND_UP_2 (ypos);
+
+ w = tiger->video_width;
+ h = tiger->video_height;
+
+ y_stride = gst_video_format_get_row_stride (tiger->video_format, 0, w);
+ uv_stride = gst_video_format_get_row_stride (tiger->video_format, 1, w);
+ u_offset =
+ gst_video_format_get_component_offset (tiger->video_format, 1, w, h);
+ v_offset =
+ gst_video_format_get_component_offset (tiger->video_format, 2, w, h);
+
+ gst_kate_tiger_blit_1 (tiger, yuv_pixels, xpos, ypos, image, image_width,
+ image_height, y_stride);
+ gst_kate_tiger_blit_sub2x2cbcr (tiger, yuv_pixels + u_offset,
+ yuv_pixels + v_offset, xpos, ypos, image, image_width, image_height,
+ uv_stride, uv_stride, 2);
+}
+
+static inline void
+gst_kate_tiger_blit_I420_YV12 (GstKateTiger * tiger,
+ guint8 * yuv_pixels, gint xpos, gint ypos, const guint8 * image,
+ gint image_width, gint image_height)
+{
+ int y_stride, u_stride, v_stride;
+ int u_offset, v_offset;
+ int h, w;
+
+ /* because U/V is 2x2 subsampled, we need to round, either up or down,
+ * to a boundary of integer number of U/V pixels:
+ */
+ xpos = GST_ROUND_UP_2 (xpos);
+ ypos = GST_ROUND_UP_2 (ypos);
+
+ w = tiger->video_width;
+ h = tiger->video_height;
+
+ y_stride = gst_video_format_get_row_stride (tiger->video_format, 0, w);
+ u_stride = gst_video_format_get_row_stride (tiger->video_format, 1, w);
+ v_stride = gst_video_format_get_row_stride (tiger->video_format, 2, w);
+ u_offset =
+ gst_video_format_get_component_offset (tiger->video_format, 1, w, h);
+ v_offset =
+ gst_video_format_get_component_offset (tiger->video_format, 2, w, h);
+
+ gst_kate_tiger_blit_1 (tiger, yuv_pixels, xpos, ypos, image, image_width,
+ image_height, y_stride);
+ gst_kate_tiger_blit_sub2x2cbcr (tiger, yuv_pixels + u_offset,
+ yuv_pixels + v_offset, xpos, ypos, image, image_width, image_height,
+ u_stride, v_stride, 1);
+}
+
+static inline void
+gst_kate_tiger_blit_UYVY (GstKateTiger * tiger,
+ guint8 * yuv_pixels, gint xpos, gint ypos, const guint8 * image,
+ gint image_width, gint image_height)
+{
+ int a0, r0, g0, b0;
+ int a1, r1, g1, b1;
+ int y0, y1, u, v;
+ int i, j;
+ int h, w;
+ const guint8 *pimage;
+ guchar *dest;
+
+ /* because U/V is 2x horizontally subsampled, we need to round to a
+ * boundary of integer number of U/V pixels in x dimension:
+ */
+ xpos = GST_ROUND_UP_2 (xpos);
+
+ w = image_width - 2;
+ h = image_height - 2;
+
+ if (xpos < 0) {
+ xpos = 0;
+ }
+
+ if (xpos + w > tiger->video_width) {
+ w = tiger->video_width - xpos;
+ }
+
+ if (ypos + h > tiger->video_height) {
+ h = tiger->video_height - ypos;
+ }
+
+ for (i = 0; i < h; i++) {
+ pimage = image + i * image_width * 4;
+ dest = yuv_pixels + (i + ypos) * tiger->video_width * 2 + xpos * 2;
+ for (j = 0; j < w; j += 2) {
+ b0 = pimage[TIGER_ARGB_B];
+ g0 = pimage[TIGER_ARGB_G];
+ r0 = pimage[TIGER_ARGB_R];
+ a0 = pimage[TIGER_ARGB_A];
+ TIGER_UNPREMULTIPLY (a0, r0, g0, b0);
+ pimage += 4;
+
+ b1 = pimage[TIGER_ARGB_B];
+ g1 = pimage[TIGER_ARGB_G];
+ r1 = pimage[TIGER_ARGB_R];
+ a1 = pimage[TIGER_ARGB_A];
+ TIGER_UNPREMULTIPLY (a1, r1, g1, b1);
+ pimage += 4;
+
+ a0 += a1 + 2;
+ a0 /= 2;
+ if (a0 == 0) {
+ dest += 4;
+ continue;
+ }
+
+ COMP_Y (y0, r0, g0, b0);
+ COMP_Y (y1, r1, g1, b1);
+
+ b0 += b1 + 2;
+ g0 += g1 + 2;
+ r0 += r1 + 2;
+
+ b0 /= 2;
+ g0 /= 2;
+ r0 /= 2;
+
+ COMP_U (u, r0, g0, b0);
+ COMP_V (v, r0, g0, b0);
+
+ BLEND (*dest, a0, u, *dest);
+ dest++;
+ BLEND (*dest, a0, y0, *dest);
+ dest++;
+ BLEND (*dest, a0, v, *dest);
+ dest++;
+ BLEND (*dest, a0, y1, *dest);
+ dest++;
+ }
+ }
+}
+
+static inline void
+gst_kate_tiger_blit_AYUV (GstKateTiger * tiger,
+ guint8 * rgb_pixels, gint xpos, gint ypos, const guint8 * image,
+ gint image_width, gint image_height)
+{
+ int a, r, g, b, a1;
+ int y, u, v;
+ int i, j;
+ int h, w;
+ const guint8 *pimage;
+ guchar *dest;
+
+ w = image_width;
+ h = image_height;
+
+ if (xpos < 0) {
+ xpos = 0;
+ }
+
+ if (xpos + w > tiger->video_width) {
+ w = tiger->video_width - xpos;
+ }
+
+ if (ypos + h > tiger->video_height) {
+ h = tiger->video_height - ypos;
+ }
+
+ for (i = 0; i < h; i++) {
+ pimage = image + i * image_width * 4;
+ dest = rgb_pixels + (i + ypos) * 4 * tiger->video_width + xpos * 4;
+ for (j = 0; j < w; j++) {
+ a = pimage[TIGER_ARGB_A];
+ b = pimage[TIGER_ARGB_B];
+ g = pimage[TIGER_ARGB_G];
+ r = pimage[TIGER_ARGB_R];
+
+ TIGER_UNPREMULTIPLY (a, r, g, b);
+
+ // convert background to yuv
+ COMP_Y (y, r, g, b);
+ COMP_U (u, r, g, b);
+ COMP_V (v, r, g, b);
+
+ // preform text "OVER" background alpha compositing
+ a1 = a + (dest[0] * (255 - a)) / 255 + 1; // add 1 to prevent divide by 0
+ OVER (dest[1], a, y, dest[0], dest[1], a1);
+ OVER (dest[2], a, u, dest[0], dest[2], a1);
+ OVER (dest[3], a, v, dest[0], dest[3], a1);
+ dest[0] = a1 - 1; // remove the temporary 1 we added
+
+ pimage += 4;
+ dest += 4;
+ }
+ }
+}
+
+static void
+gst_kate_tiger_blend_yuv (GstKateTiger * tiger, GstBuffer * video_frame,
+ const guint8 * image, gint image_width, gint image_height)
+{
+ gint xpos = 0, ypos = 0;
+ gint width, height;
+
+ width = image_width;
+ height = image_height;
+
+ switch (tiger->video_format) {
+ case GST_VIDEO_FORMAT_I420:
+ case GST_VIDEO_FORMAT_YV12:
+ gst_kate_tiger_blit_I420_YV12 (tiger,
+ GST_BUFFER_DATA (video_frame), xpos, ypos, image, image_width,
+ image_height);
+ break;
+ case GST_VIDEO_FORMAT_NV12:
+ case GST_VIDEO_FORMAT_NV21:
+ gst_kate_tiger_blit_NV12_NV21 (tiger,
+ GST_BUFFER_DATA (video_frame), xpos, ypos, image, image_width,
+ image_height);
+ break;
+ case GST_VIDEO_FORMAT_UYVY:
+ gst_kate_tiger_blit_UYVY (tiger,
+ GST_BUFFER_DATA (video_frame), xpos, ypos, image, image_width,
+ image_height);
+ break;
+ case GST_VIDEO_FORMAT_AYUV:
+ gst_kate_tiger_blit_AYUV (tiger,
+ GST_BUFFER_DATA (video_frame), xpos, ypos, image, image_width,
+ image_height);
+ break;
+ default:
+ g_assert_not_reached ();
+ }
+}
+
static GstFlowReturn
gst_kate_tiger_video_chain (GstPad * pad, GstBuffer * buf)
{
@@ -757,8 +1212,9 @@ gst_kate_tiger_video_chain (GstPad * pad, GstBuffer * buf)
g_cond_broadcast (tiger->cond);
}
- /* Update first with a dummy buffer pointer we cannot write to. If there is nothing
- to draw, we will not have to make it writeable */
+ /* Update first with a dummy buffer pointer we cannot write to, but with the
+ right dimensions. If there is nothing to draw, we will not have to make
+ it writeable. */
ptr = GST_BUFFER_DATA (buf);
ret =
tiger_renderer_set_buffer (tiger->tr, ptr, tiger->video_width,
@@ -791,7 +1247,19 @@ gst_kate_tiger_video_chain (GstPad * pad, GstBuffer * buf)
}
/* and setup that buffer before rendering */
- ptr = GST_BUFFER_DATA (buf);
+ if (gst_video_format_is_yuv (tiger->video_format)) {
+ guint8 *tmp = g_realloc (tiger->render_buffer,
+ tiger->video_width * tiger->video_height * 4);
+ if (!tmp) {
+ GST_WARNING_OBJECT (tiger, "Failed to allocate render buffer");
+ goto pass;
+ }
+ tiger->render_buffer = tmp;
+ ptr = tiger->render_buffer;
+ tiger_renderer_set_surface_clear_color (tiger->tr, 1, 0.0, 0.0, 0.0, 0.0);
+ } else {
+ ptr = GST_BUFFER_DATA (buf);
+ }
ret =
tiger_renderer_set_buffer (tiger->tr, ptr, tiger->video_width,
tiger->video_height, tiger->video_width * 4, tiger->swap_rgb);
@@ -808,6 +1276,11 @@ gst_kate_tiger_video_chain (GstPad * pad, GstBuffer * buf)
GST_LOG_OBJECT (tiger, "Tiger renderer rendered on video frame at %f", t);
}
+ if (gst_video_format_is_yuv (tiger->video_format)) {
+ gst_kate_tiger_blend_yuv (tiger, buf, tiger->render_buffer,
+ tiger->video_width, tiger->video_height);
+ }
+
pass:
GST_KATE_TIGER_MUTEX_UNLOCK (tiger);
diff --git a/ext/kate/gstkatetiger.h b/ext/kate/gstkatetiger.h
index 0947da4cf..f966cbf15 100644
--- a/ext/kate/gstkatetiger.h
+++ b/ext/kate/gstkatetiger.h
@@ -49,6 +49,7 @@
#include <kate/kate.h>
#include <tiger/tiger.h>
#include <gst/gst.h>
+#include <gst/video/video.h>
#include "gstkateutil.h"
G_BEGIN_DECLS
@@ -90,9 +91,11 @@ struct _GstKateTiger
guchar default_background_a;
gboolean silent;
+ GstVideoFormat video_format;
gint video_width;
gint video_height;
gboolean swap_rgb;
+ guint8 *render_buffer;
GMutex *mutex;
GCond *cond;