dvbsubenc: Add DVB Subtitle encoder

Add an element that converts AYUV video frames to a DVB subpicture stream. It's fairly simple for now. Later it would be good to support input via a stream that contains only GstVideoOverlayComposition meta. The element searches each input video frame for the largest sub-region containing non-transparent pixels and encodes that as a single DVB subpicture region. It can also do palette reduction of the input frames using code taken from libimagequant. There are various FIXME for potential improvements for now, but it works. Part-of: <https://gitlab.freedesktop.org/gstreamer/gst-plugins-bad/-/merge_requests/1227>
author: Jan Schmidt <jan@centricular.com> 2020-04-07 21:47:22 +1000
committer: Jan Schmidt <jan@centricular.com> 2020-06-17 12:50:13 +1000
commit: 1cf3cae5e1e35c2e8eb8a919db77f2970e743676 (patch)
tree: fc0411d02fc3ad55378912f49b85fb6406db99f0 /gst/dvbsubenc
parent: f899728dd45bbf154123bca3c1c8c9e2b6af5c24 (diff)
download: gstreamer-plugins-bad-1cf3cae5e1e35c2e8eb8a919db77f2970e743676.tar.gz
21 files changed, 6164 insertions, 0 deletions
diff --git a/gst/dvbsubenc/gstdvbsubenc-util.c b/gst/dvbsubenc/gstdvbsubenc-util.c
new file mode 100644
index 000000000..c9a152b19
--- /dev/null
+++ b/gst/dvbsubenc/gstdvbsubenc-util.c
@@ -0,0 +1,802 @@
+/* GStreamer
+ * Copyright (C) <2020> Jan Schmidt <jan@centricular.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <stdlib.h>
+
+//#define HACK_2BIT /* Force 2-bit output by discarding colours */
+//#define HACK_4BIT /* Force 4-bit output by discarding colours */
+
+#include "gstdvbsubenc.h"
+#include <gst/base/gstbytewriter.h>
+#include <gst/base/gstbitwriter.h>
+
+#include "libimagequant/libimagequant.h"
+
+#define DVB_SEGMENT_SYNC_BYTE 0xF
+
+enum DVBSegmentType
+{
+  DVB_SEGMENT_TYPE_PAGE_COMPOSITION = 0x10,
+  DVB_SEGMENT_TYPE_REGION_COMPOSITION = 0x11,
+  DVB_SEGMENT_TYPE_CLUT_DEFINITION = 0x12,
+  DVB_SEGMENT_TYPE_OBJECT_DATA = 0x13,
+  DVB_SEGMENT_TYPE_DISPLAY_DEFINITION = 0x14,
+
+  DVB_SEGMENT_TYPE_END_OF_DISPLAY = 0x80
+};
+
+enum DVBPixelDataType
+{
+  DVB_PIXEL_DATA_TYPE_2BIT = 0x10,
+  DVB_PIXEL_DATA_TYPE_4BIT = 0x11,
+  DVB_PIXEL_DATA_TYPE_8BIT = 0x12,
+  DVB_PIXEL_DATA_TYPE_END_OF_LINE = 0xF0
+};
+
+struct HistogramEntry
+{
+  guint32 colour;
+  guint32 count;
+  guint32 substitution;
+};
+
+struct ColourEntry
+{
+  guint32 colour;
+  guint32 pix_index;
+};
+
+typedef struct HistogramEntry HistogramEntry;
+typedef struct ColourEntry ColourEntry;
+
+static gint
+compare_uint32 (gconstpointer a, gconstpointer b)
+{
+  guint32 v1 = *(guint32 *) (a);
+  guint32 v2 = *(guint32 *) (b);
+
+  if (v1 < v2)
+    return -1;
+  if (v1 > v2)
+    return 1;
+  return 0;
+}
+
+static gint
+compare_colour_entry_colour (gconstpointer a, gconstpointer b)
+{
+  const ColourEntry *c1 = (ColourEntry *) (a);
+  const ColourEntry *c2 = (ColourEntry *) (b);
+
+  /* Reverse order, so highest alpha comes first: */
+  return compare_uint32 (&c2->colour, &c1->colour);
+}
+
+static void
+image_get_rgba_row_callback (liq_color row_out[], int row_index, int width,
+    void *user_info)
+{
+  int column_index;
+  GstVideoFrame *src = (GstVideoFrame *) (user_info);
+  guint8 *src_pixels = (guint8 *) (src->data[0]);
+  const guint32 src_stride = GST_VIDEO_INFO_PLANE_STRIDE (&src->info, 0);
+  guint8 *src_row = src_pixels + (row_index * src_stride);
+  gint offset = 0;
+
+  for (column_index = 0; column_index < width; column_index++) {
+    liq_color *col = row_out + column_index;
+    guint8 *p = src_row + offset;
+
+    /* FIXME: We pass AYUV into the ARGB colour values,
+     * which works but probably makes suboptimal choices about
+     * which colours to preserve. It would be better to convert to RGBA
+     * and back again, or to modify libimagequant to handle ayuv */
+    col->a = p[0];
+    col->r = p[1];
+    col->g = p[2];
+    col->b = p[3];
+
+    offset += 4;
+  }
+}
+
+/*
+ * Utility function to unintelligently extract a
+ * (max) 256 colour image from an AYUV input
+ * Dumb for now, but could be improved if needed. If there's
+ * more than 256 colours in the input, it will reduce it 256
+ * by taking the most common 255 colours + transparent and mapping all
+ * remaining colours to the nearest neighbour.
+ *
+ * FIXME: Integrate a better palette selection algorithm.
+ */
+gboolean
+gst_dvbsubenc_ayuv_to_ayuv8p (GstVideoFrame * src, GstVideoFrame * dest,
+    int max_colours, guint32 * out_num_colours)
+{
+  /* Allocate a temporary array the size of the input frame, copy in
+   * the source pixels, sort them by value and then count the first
+   * up to 256 colours. */
+  gboolean ret = FALSE;
+
+  GArray *colours, *histogram;
+  gint i, num_pixels, dest_y_index, out_index;
+  guint num_colours, cur_count;
+  guint32 last;
+  guint8 *s;
+  HistogramEntry *h;
+  ColourEntry *c;
+  const guint32 src_stride = GST_VIDEO_INFO_PLANE_STRIDE (&src->info, 0);
+  const guint32 dest_stride = GST_VIDEO_INFO_PLANE_STRIDE (&dest->info, 0);
+
+  if (GST_VIDEO_INFO_FORMAT (&src->info) != GST_VIDEO_FORMAT_AYUV)
+    return FALSE;
+
+  if (GST_VIDEO_INFO_WIDTH (&src->info) != GST_VIDEO_INFO_WIDTH (&dest->info) ||
+      GST_VIDEO_INFO_HEIGHT (&src->info) != GST_VIDEO_INFO_HEIGHT (&dest->info))
+    return FALSE;
+
+  num_pixels =
+      GST_VIDEO_INFO_WIDTH (&src->info) * GST_VIDEO_INFO_HEIGHT (&src->info);
+  s = (guint8 *) (src->data[0]);
+
+  colours = g_array_sized_new (FALSE, FALSE, sizeof (ColourEntry), num_pixels);
+  colours = g_array_set_size (colours, num_pixels);
+
+  histogram =
+      g_array_sized_new (FALSE, TRUE, sizeof (HistogramEntry), num_pixels);
+  histogram = g_array_set_size (histogram, num_pixels);
+
+  /* Copy the pixels to an array we can sort, dropping any stride padding,
+   * and recording the output index into the destination bitmap in the
+   * pix_index field */
+  dest_y_index = 0;
+  out_index = 0;
+  for (i = 0; i < GST_VIDEO_INFO_HEIGHT (&src->info); i++) {
+    guint32 x_index;
+    gint x;
+
+    for (x = 0, x_index = 0; x < GST_VIDEO_INFO_WIDTH (&src->info);
+        x++, x_index += 4) {
+      guint8 *pix = s + x_index;
+
+      c = &g_array_index (colours, ColourEntry, out_index);
+      c->colour = GST_READ_UINT32_BE (pix);
+      c->pix_index = dest_y_index + x;
+
+      out_index++;
+    }
+
+    s += src_stride;
+    dest_y_index += dest_stride;
+  }
+
+  /* Build a histogram of the highest colour counts: */
+  g_array_sort (colours, compare_colour_entry_colour);
+  c = &g_array_index (colours, ColourEntry, 0);
+  last = c->colour;
+  num_colours = 0;
+  cur_count = 1;
+  for (i = 1; i < num_pixels; i++) {
+    ColourEntry *c = &g_array_index (colours, ColourEntry, i);
+    guint32 cur = c->colour;
+
+    if (cur == last) {
+      cur_count++;
+      continue;
+    }
+
+    /* Colour changed - add an entry to the histogram */
+    h = &g_array_index (histogram, HistogramEntry, num_colours);
+    h->colour = last;
+    h->count = cur_count;
+
+    num_colours++;
+    cur_count = 1;
+    last = cur;
+  }
+  h = &g_array_index (histogram, HistogramEntry, num_colours);
+  h->colour = last;
+  h->count = cur_count;
+  num_colours++;
+
+  GST_LOG ("image has %u colours", num_colours);
+  histogram = g_array_set_size (histogram, num_colours);
+
+  if (num_colours > max_colours) {
+    liq_image *image;
+    liq_result *res;
+    const liq_palette *pal;
+    int i;
+    int height = GST_VIDEO_INFO_HEIGHT (&src->info);
+    unsigned char **dest_rows = malloc (height * sizeof (void *));
+    guint8 *dest_palette = (guint8 *) (dest->data[1]);
+    liq_attr *attr = liq_attr_create ();
+    gint out_index = 0;
+
+    for (i = 0; i < height; i++) {
+      dest_rows[i] = (guint8 *) (dest->data[0]) + i * dest_stride;
+    }
+
+    liq_set_max_colors (attr, max_colours);
+
+    image = liq_image_create_custom (attr, image_get_rgba_row_callback, src,
+        GST_VIDEO_INFO_WIDTH (&src->info), GST_VIDEO_INFO_HEIGHT (&src->info),
+        0);
+
+    res = liq_quantize_image (attr, image);
+
+    liq_write_remapped_image_rows (res, image, dest_rows);
+
+    pal = liq_get_palette (res);
+    num_colours = pal->count;
+
+    /* Write out the palette */
+    for (i = 0; i < num_colours; i++) {
+      guint8 *c = dest_palette + out_index;
+      const liq_color *col = pal->entries + i;
+
+      c[0] = col->a;
+      c[1] = col->r;
+      c[2] = col->g;
+      c[3] = col->b;
+
+      out_index += 4;
+    }
+
+    free (dest_rows);
+
+    liq_attr_destroy (attr);
+    liq_image_destroy (image);
+    liq_result_destroy (res);
+  } else {
+    guint8 *d = (guint8 *) (dest->data[0]);
+    guint8 *palette = (guint8 *) (dest->data[1]);
+    gint out_index = 0;
+
+    /* Write out the palette */
+    for (i = 0; i < num_colours; i++) {
+      h = &g_array_index (histogram, HistogramEntry, i);
+      GST_WRITE_UINT32_BE (palette + out_index, h->colour);
+      out_index += 4;
+    }
+
+    /* Write out the palette image. At this point, both the
+     * colours and histogram arrays are sorted in descending AYUV value,
+     * so walk them both and write out the current palette index */
+    out_index = 0;
+    for (i = 0; i < num_pixels; i++) {
+      c = &g_array_index (colours, ColourEntry, i);
+      h = &g_array_index (histogram, HistogramEntry, out_index);
+
+      if (c->colour != h->colour) {
+        out_index++;
+        h = &g_array_index (histogram, HistogramEntry, out_index);
+        g_assert (h->colour == c->colour);      /* We must be walking colours in the same order in both arrays */
+      }
+      d[c->pix_index] = out_index;
+    }
+  }
+
+  ret = TRUE;
+  if (out_num_colours)
+    *out_num_colours = num_colours;
+
+  g_array_free (colours, TRUE);
+  g_array_free (histogram, TRUE);
+
+  return ret;
+}
+
+typedef void (*EncodeRLEFunc) (GstByteWriter * b, const guint8 * pixels,
+    const gint stride, const gint w, const gint h);
+
+static void
+encode_rle2 (GstByteWriter * b, const guint8 * pixels,
+    const gint stride, const gint w, const gint h)
+{
+  GstBitWriter bits;
+
+  int y;
+
+  gst_bit_writer_init (&bits);
+
+  for (y = 0; y < h; y++) {
+    int x = 0;
+    guint size;
+
+    gst_byte_writer_put_uint8 (b, DVB_PIXEL_DATA_TYPE_2BIT);
+
+    while (x < w) {
+      int x_end = x;
+      int run_length;
+      guint8 pix;
+
+      pix = pixels[x_end++];
+      while (x_end < w && pixels[x_end] == pix)
+        x_end++;
+
+#ifdef HACK_2BIT
+      pix >>= 6;                /* HACK to convert 8 bit to 2 bit palette */
+#endif
+
+      /* 284 is the largest run length we can encode */
+      run_length = MIN (x_end - x, 284);
+
+      if (run_length >= 29) {
+        /* 000011LLLL = run 29 to 284 pixels */
+        if (run_length > 284)
+          run_length = 284;
+
+        gst_bit_writer_put_bits_uint8 (&bits, 0x03, 6);
+        gst_bit_writer_put_bits_uint8 (&bits, run_length - 29, 8);
+        gst_bit_writer_put_bits_uint8 (&bits, pix, 2);
+      } else if (run_length >= 12 && run_length <= 27) {
+        /* 000010LLLL = run 12 to 27 pixels */
+        gst_bit_writer_put_bits_uint8 (&bits, 0x02, 6);
+        gst_bit_writer_put_bits_uint8 (&bits, run_length - 12, 4);
+        gst_bit_writer_put_bits_uint8 (&bits, pix, 2);
+      } else if (run_length >= 3 && run_length <= 10) {
+        /* 001LL = run 3 to 10 pixels */
+        gst_bit_writer_put_bits_uint8 (&bits, 0, 2);
+        gst_bit_writer_put_bits_uint8 (&bits, 0x8 + run_length - 3, 4);
+        gst_bit_writer_put_bits_uint8 (&bits, pix, 2);
+      }
+      /* Missed cases - 11 pixels, 28 pixels or a short length 1 or 2 pixels
+       * - write out a single pixel if != 0, or 1 or 2 pixels of black */
+      else if (pix != 0) {
+        gst_bit_writer_put_bits_uint8 (&bits, pix, 2);
+        run_length = 1;
+      } else if (run_length == 2) {
+        /* 0000 01 - 2 pixels colour 0 */
+        gst_bit_writer_put_bits_uint8 (&bits, 0x1, 6);
+        run_length = 2;
+      } else {
+        /* 0001 - single pixel colour 0 */
+        gst_bit_writer_put_bits_uint8 (&bits, 0x1, 4);
+        run_length = 1;
+      }
+
+      x += run_length;
+      GST_LOG ("%u pixels = colour %u", run_length, pix);
+    }
+
+    /* End of line 0x00 */
+    gst_bit_writer_put_bits_uint8 (&bits, 0x00, 8);
+
+    /* pad by 4 bits if needed to byte align, then
+     * write bit string to output */
+    gst_bit_writer_align_bytes (&bits, 0);
+    size = gst_bit_writer_get_size (&bits);
+
+    gst_byte_writer_put_data (b, gst_bit_writer_get_data (&bits), size / 8);
+
+    gst_bit_writer_reset (&bits);
+    gst_bit_writer_init (&bits);
+
+    GST_LOG ("y %u 2-bit RLE string = %u bits", y, size);
+    gst_byte_writer_put_uint8 (b, DVB_PIXEL_DATA_TYPE_END_OF_LINE);
+    pixels += stride;
+  }
+}
+
+static void
+encode_rle4 (GstByteWriter * b, const guint8 * pixels,
+    const gint stride, const gint w, const gint h)
+{
+  GstBitWriter bits;
+
+  int y;
+
+  gst_bit_writer_init (&bits);
+
+  for (y = 0; y < h; y++) {
+    int x = 0;
+    guint size;
+
+    gst_byte_writer_put_uint8 (b, DVB_PIXEL_DATA_TYPE_4BIT);
+
+    while (x < w) {
+      int x_end = x;
+      int run_length;
+      guint8 pix;
+
+      pix = pixels[x_end++];
+      while (x_end < w && pixels[x_end] == pix)
+        x_end++;
+
+      /* 280 is the largest run length we can encode */
+      run_length = MIN (x_end - x, 280);
+
+      GST_LOG ("Encoding run %u pixels = colour %u", run_length, pix);
+
+#ifdef HACK_4BIT
+      pix >>= 4;                /* HACK to convert 8 bit to 4 palette */
+#endif
+
+      if (pix == 0 && run_length >= 3 && run_length <= 9) {
+        gst_bit_writer_put_bits_uint8 (&bits, 0, 4);
+        gst_bit_writer_put_bits_uint8 (&bits, run_length - 2, 4);
+      } else if (run_length >= 4 && run_length < 25) {
+        /* 4 to 7 pixels encoding */
+        if (run_length > 7)
+          run_length = 7;
+
+        gst_bit_writer_put_bits_uint8 (&bits, 0, 4);
+        gst_bit_writer_put_bits_uint8 (&bits, 0x8 + run_length - 4, 4);
+        gst_bit_writer_put_bits_uint8 (&bits, pix, 4);
+      } else if (run_length >= 25) {
+        /* Run length 25 to 280 pixels */
+        if (run_length > 280)
+          run_length = 280;
+
+        gst_bit_writer_put_bits_uint8 (&bits, 0x0f, 8);
+        gst_bit_writer_put_bits_uint8 (&bits, run_length - 25, 8);
+        gst_bit_writer_put_bits_uint8 (&bits, pix, 4);
+      }
+      /* Short length, 1, 2 or 3 pixels - write out a single pixel if != 0,
+       * or 1 or 2 pixels of black */
+      else if (pix != 0) {
+        gst_bit_writer_put_bits_uint8 (&bits, pix, 4);
+        run_length = 1;
+      } else if (run_length > 1) {
+        /* 0000 1101 */
+        gst_bit_writer_put_bits_uint8 (&bits, 0xd, 8);
+        run_length = 2;
+      } else {
+        /* 0000 1100 */
+        gst_bit_writer_put_bits_uint8 (&bits, 0xc, 8);
+        run_length = 1;
+      }
+      x += run_length;
+
+      GST_LOG ("Put %u pixels = colour %u", run_length, pix);
+    }
+
+    /* End of line 0x00 */
+    gst_bit_writer_put_bits_uint8 (&bits, 0x00, 8);
+
+    /* pad by 4 bits if needed to byte align, then
+     * write bit string to output */
+    gst_bit_writer_align_bytes (&bits, 0);
+    size = gst_bit_writer_get_size (&bits);
+
+    gst_byte_writer_put_data (b, gst_bit_writer_get_data (&bits), size / 8);
+
+    gst_bit_writer_reset (&bits);
+    gst_bit_writer_init (&bits);
+
+    GST_LOG ("y %u 4-bit RLE string = %u bits", y, size);
+    gst_byte_writer_put_uint8 (b, DVB_PIXEL_DATA_TYPE_END_OF_LINE);
+    pixels += stride;
+  }
+}
+
+static void
+encode_rle8 (GstByteWriter * b, const guint8 * pixels,
+    const gint stride, const gint w, const gint h)
+{
+  int y;
+
+  for (y = 0; y < h; y++) {
+    int x = 0;
+
+    gst_byte_writer_put_uint8 (b, DVB_PIXEL_DATA_TYPE_8BIT);
+
+    while (x < w) {
+      int x_end = x;
+      int run_length;
+      guint8 pix;
+
+      pix = pixels[x_end++];
+      while (x_end < w && pixels[x_end] == pix)
+        x_end++;
+
+      /* 127 is the largest run length we can encode */
+      run_length = MIN (x_end - x, 127);
+
+      if (run_length == 1 && pix != 0) {
+        /* a single non-zero pixel - encode directly */
+        gst_byte_writer_put_uint8 (b, pix);
+      } else if (pix == 0) {
+        /* Encode up to 1-127 pixels of colour 0 */
+        gst_byte_writer_put_uint8 (b, 0);
+        gst_byte_writer_put_uint8 (b, run_length);
+      } else if (run_length > 2) {
+        /* Encode 3-127 pixels of colour 'pix' directly */
+        gst_byte_writer_put_uint8 (b, 0);
+        gst_byte_writer_put_uint8 (b, 0x80 | run_length);
+        gst_byte_writer_put_uint8 (b, pix);
+      } else {
+        /* Short 1-2 pixel run, encode it directly */
+        if (run_length == 2)
+          gst_byte_writer_put_uint8 (b, pix);
+        gst_byte_writer_put_uint8 (b, pix);
+        g_assert (run_length == 1 || run_length == 2);
+      }
+      x += run_length;
+    }
+
+    /* End of line bytes */
+    gst_byte_writer_put_uint8 (b, 0x00);
+    // This 2nd 0x00 byte is correct from the spec, but ffmpeg
+    // as of 2020-04-24 does not like it
+    gst_byte_writer_put_uint8 (b, 0x00);
+    gst_byte_writer_put_uint8 (b, DVB_PIXEL_DATA_TYPE_END_OF_LINE);
+    pixels += stride;
+  }
+}
+
+static gboolean
+dvbenc_write_object_data (GstByteWriter * b, int object_version, int page_id,
+    int object_id, SubpictureRect * s)
+{
+  guint seg_size_pos, end_pos;
+  guint pixel_fields_size_pos, top_start_pos, bottom_start_pos;
+  EncodeRLEFunc encode_rle_func;
+  const gint stride = GST_VIDEO_INFO_PLANE_STRIDE (&s->frame->info, 0);
+  const gint w = GST_VIDEO_INFO_WIDTH (&s->frame->info);
+  const gint h = GST_VIDEO_INFO_HEIGHT (&s->frame->info);
+  const guint8 *pixels = (guint8 *) (s->frame->data[0]);
+
+  if (s->nb_colours <= 4)
+    encode_rle_func = encode_rle2;
+  else if (s->nb_colours <= 16)
+    encode_rle_func = encode_rle4;
+  else
+    encode_rle_func = encode_rle8;
+
+  gst_byte_writer_put_uint8 (b, DVB_SEGMENT_SYNC_BYTE);
+  gst_byte_writer_put_uint8 (b, DVB_SEGMENT_TYPE_OBJECT_DATA);
+  gst_byte_writer_put_uint16_be (b, page_id);
+  seg_size_pos = gst_byte_writer_get_pos (b);
+  gst_byte_writer_put_uint16_be (b, 0);
+  gst_byte_writer_put_uint16_be (b, object_id);
+  /* version number, coding_method (0), non-modifying-flag (0), reserved bit */
+  gst_byte_writer_put_uint8 (b, (object_version << 4) | 0x01);
+
+  pixel_fields_size_pos = gst_byte_writer_get_pos (b);
+  gst_byte_writer_put_uint16_be (b, 0);
+  gst_byte_writer_put_uint16_be (b, 0);
+
+  /* Write the top field (even) lines (round up lines / 2) */
+  top_start_pos = gst_byte_writer_get_pos (b);
+  encode_rle_func (b, pixels, stride * 2, w, (h + 1) / 2);
+
+  /* Write the bottom field (odd) lines (round down lines / 2) */
+  bottom_start_pos = gst_byte_writer_get_pos (b);
+  if (h > 1)
+    encode_rle_func (b, pixels + stride, stride * 2, w, h >> 1);
+
+  end_pos = gst_byte_writer_get_pos (b);
+
+  /* If the encoded size of the top+bottom field data blocks is even,
+   * add a stuffing byte */
+  if (((end_pos - top_start_pos) & 1) == 0) {
+    gst_byte_writer_put_uint8 (b, 0);
+    end_pos = gst_byte_writer_get_pos (b);
+  }
+
+  /* Re-write the size fields */
+  gst_byte_writer_set_pos (b, seg_size_pos);
+  if (end_pos - (seg_size_pos + 2) > G_MAXUINT16)
+    return FALSE;               /* Data too big */
+  gst_byte_writer_put_uint16_be (b, end_pos - (seg_size_pos + 2));
+
+  if (bottom_start_pos - top_start_pos > G_MAXUINT16)
+    return FALSE;               /* Data too big */
+  if (end_pos - bottom_start_pos > G_MAXUINT16)
+    return FALSE;               /* Data too big */
+
+  gst_byte_writer_set_pos (b, pixel_fields_size_pos);
+  gst_byte_writer_put_uint16_be (b, bottom_start_pos - top_start_pos);
+  gst_byte_writer_put_uint16_be (b, end_pos - bottom_start_pos);
+  gst_byte_writer_set_pos (b, end_pos);
+
+  GST_LOG ("Object seg size %u top_size %u bottom_size %u",
+      end_pos - (seg_size_pos + 2), bottom_start_pos - top_start_pos,
+      end_pos - bottom_start_pos);
+
+  return TRUE;
+}
+
+static void
+dvbenc_write_clut (GstByteWriter * b, int object_version, int page_id,
+    int clut_id, SubpictureRect * s)
+{
+  guint8 *palette;
+  int clut_entry_flag;
+  guint seg_size_pos, pos;
+  int i;
+
+  if (s->nb_colours <= 4)
+    clut_entry_flag = 4;
+  else if (s->nb_colours <= 16)
+    clut_entry_flag = 2;
+  else
+    clut_entry_flag = 1;
+  gst_byte_writer_put_uint8 (b, DVB_SEGMENT_SYNC_BYTE);
+  gst_byte_writer_put_uint8 (b, DVB_SEGMENT_TYPE_CLUT_DEFINITION);
+  gst_byte_writer_put_uint16_be (b, page_id);
+  seg_size_pos = gst_byte_writer_get_pos (b);
+  gst_byte_writer_put_uint16_be (b, 0);
+  gst_byte_writer_put_uint8 (b, clut_id);
+  /* version number, reserved bits */
+  gst_byte_writer_put_uint8 (b, (object_version << 4) | 0x0F);
+
+  palette = (guint8 *) (s->frame->data[1]);
+  for (i = 0; i < s->nb_colours; i++) {
+
+    gst_byte_writer_put_uint8 (b, i);
+    /* clut_entry_flag | 4-bits reserved | full_range_flag = 1 */
+    gst_byte_writer_put_uint8 (b, clut_entry_flag << 5 | 0x1F);
+    /* Write YVUT value, where T (transparency) = 255 - A, Palette is AYUV */
+    gst_byte_writer_put_uint8 (b, palette[1]);  /* Y */
+    gst_byte_writer_put_uint8 (b, palette[3]);  /* V */
+    gst_byte_writer_put_uint8 (b, palette[2]);  /* U */
+    gst_byte_writer_put_uint8 (b, 255 - palette[0]);    /* A */
+
+#if defined (HACK_2BIT)
+    palette += 4 * 64;          /* HACK to generate 4-colour palette */
+#elif defined (HACK_4BIT)
+    palette += 4 * 16;          /* HACK to generate 16-colour palette */
+#else
+    palette += 4;
+#endif
+  }
+
+  /* Re-write the size field */
+  pos = gst_byte_writer_get_pos (b);
+  gst_byte_writer_set_pos (b, seg_size_pos);
+  gst_byte_writer_put_uint16_be (b, pos - (seg_size_pos + 2));
+  gst_byte_writer_set_pos (b, pos);
+}
+
+static void
+dvbenc_write_region_segment (GstByteWriter * b, int object_version, int page_id,
+    int region_id, SubpictureRect * s)
+{
+  int region_depth;
+  guint seg_size_pos, pos;
+  gint w = GST_VIDEO_INFO_WIDTH (&s->frame->info);
+  gint h = GST_VIDEO_INFO_HEIGHT (&s->frame->info);
+
+  if (s->nb_colours <= 4)
+    region_depth = 1;
+  else if (s->nb_colours <= 16)
+    region_depth = 2;
+  else
+    region_depth = 3;
+
+  gst_byte_writer_put_uint8 (b, DVB_SEGMENT_SYNC_BYTE);
+  gst_byte_writer_put_uint8 (b, DVB_SEGMENT_TYPE_REGION_COMPOSITION);
+  gst_byte_writer_put_uint16_be (b, page_id);
+
+  /* Size placeholder */
+  seg_size_pos = gst_byte_writer_get_pos (b);
+  gst_byte_writer_put_uint16_be (b, 0);
+
+  gst_byte_writer_put_uint8 (b, region_id);
+  /* version number, fill flag, reserved bits */
+  gst_byte_writer_put_uint8 (b, (object_version << 4) | (0 << 3) | 0x07);
+  gst_byte_writer_put_uint16_be (b, w);
+  gst_byte_writer_put_uint16_be (b, h);
+  /* level_of_compatibility and depth */
+  gst_byte_writer_put_uint8 (b, region_depth << 5 | region_depth << 2 | 0x03);
+  /* CLUT id */
+  gst_byte_writer_put_uint8 (b, region_id);
+  /* Dummy flags for the fill colours */
+  gst_byte_writer_put_uint16_be (b, 0x0003);
+
+  /* Object ID = region_id = CLUT id */
+  gst_byte_writer_put_uint16_be (b, region_id);
+  /* object type = 0, x,y corner = 0 */
+  gst_byte_writer_put_uint16_be (b, 0x0000);
+  gst_byte_writer_put_uint16_be (b, 0xf000);
+
+  /* Re-write the size field */
+  pos = gst_byte_writer_get_pos (b);
+  gst_byte_writer_set_pos (b, seg_size_pos);
+  gst_byte_writer_put_uint16_be (b, pos - (seg_size_pos + 2));
+  gst_byte_writer_set_pos (b, pos);
+}
+
+GstBuffer *
+gst_dvbenc_encode (int object_version, int page_id, SubpictureRect * s,
+    guint num_subpictures)
+{
+  GstByteWriter b;
+  guint seg_size_pos, pos;
+  guint i;
+
+#ifdef HACK_2BIT
+  /* HACK: Only output 4 colours (results may be garbage, but tests
+   * the encoding */
+  s->nb_colours = 4;
+#elif defined (HACK_4BIT)
+  /* HACK: Only output 16 colours */
+  s->nb_colours = 16;
+#endif
+
+  gst_byte_writer_init (&b);
+
+  /* GStreamer passes DVB subpictures as private PES packets with
+   * 0x20 0x00 prefixed */
+  gst_byte_writer_put_uint16_be (&b, 0x2000);
+
+  /* Page Composition Segment */
+  gst_byte_writer_put_uint8 (&b, DVB_SEGMENT_SYNC_BYTE);
+  gst_byte_writer_put_uint8 (&b, DVB_SEGMENT_TYPE_PAGE_COMPOSITION);
+  gst_byte_writer_put_uint16_be (&b, page_id);
+  seg_size_pos = gst_byte_writer_get_pos (&b);
+  gst_byte_writer_put_uint16_be (&b, 0);
+  gst_byte_writer_put_uint8 (&b, 30);
+
+  /* We always write complete overlay subregions, so use page_state = 2 (mode change) */
+  gst_byte_writer_put_uint8 (&b, (object_version << 4) | (2 << 2) | 0x3);
+
+  for (i = 0; i < num_subpictures; i++) {
+    gst_byte_writer_put_uint8 (&b, i);
+    gst_byte_writer_put_uint8 (&b, 0xFF);
+    gst_byte_writer_put_uint16_be (&b, s[i].x);
+    gst_byte_writer_put_uint16_be (&b, s[i].y);
+  }
+
+  /* Rewrite the size field */
+  pos = gst_byte_writer_get_pos (&b);
+  gst_byte_writer_set_pos (&b, seg_size_pos);
+  gst_byte_writer_put_uint16_be (&b, pos - (seg_size_pos + 2));
+  gst_byte_writer_set_pos (&b, pos);
+
+  /* Region Composition */
+  for (i = 0; i < num_subpictures; i++) {
+    dvbenc_write_region_segment (&b, object_version, page_id, i, s + i);
+  }
+  /* CLUT definitions */
+  for (i = 0; i < num_subpictures; i++) {
+    dvbenc_write_clut (&b, object_version, page_id, i, s + i);
+  }
+  /* object data */
+  for (i = 0; i < num_subpictures; i++) {
+    /* FIXME: Any object data could potentially overflow the 64K field
+     * size, in which case we should split it */
+    if (!dvbenc_write_object_data (&b, object_version, page_id, i, s + i)) {
+      GST_WARNING ("Object data was too big to encode");
+      goto fail;
+    }
+  }
+  /* End of Display Set segment */
+  gst_byte_writer_put_uint8 (&b, DVB_SEGMENT_SYNC_BYTE);
+  gst_byte_writer_put_uint8 (&b, DVB_SEGMENT_TYPE_END_OF_DISPLAY);
+  gst_byte_writer_put_uint16_be (&b, page_id);
+  gst_byte_writer_put_uint16_be (&b, 0);
+
+  /* End of PES data marker */
+  gst_byte_writer_put_uint8 (&b, 0xFF);
+
+  return gst_byte_writer_reset_and_get_buffer (&b);
+
+fail:
+  gst_byte_writer_reset (&b);
+  return NULL;
+}
diff --git a/gst/dvbsubenc/gstdvbsubenc.c b/gst/dvbsubenc/gstdvbsubenc.c
new file mode 100644
index 000000000..bde095434
--- /dev/null
+++ b/gst/dvbsubenc/gstdvbsubenc.c
@@ -0,0 +1,609 @@
+/* GStreamer
+ * Copyright (C) <2020> Jan Schmidt <jan@centricular.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "gstdvbsubenc.h"
+#include <string.h>
+
+/**
+ * SECTION:element-dvbsubenc
+ * @title: dvbsubenc
+ * @see_also: dvbsuboverlay
+ *
+ * This element encodes AYUV video frames to DVB subpictures.
+ *
+ * ## Example pipelines
+ * |[
+ * gst-launch-1.0 videotestsrc num-buffers=900 ! video/x-raw,width=720,height=576,framerate=30/1 ! x264enc bitrate=500 ! h264parse ! mpegtsmux name=mux ! filesink location=test.ts  filesrc location=test-subtitles.srt ! subparse ! textrender ! dvbsubenc ! mux.
+ * ]|
+ * Encode a test video signal and an SRT subtitle file to MPEG-TS with a DVB subpicture track
+ *
+ */
+
+#define DEFAULT_MAX_COLOURS 16
+#define DEFAULT_TS_OFFSET 0
+
+enum
+{
+  PROP_0,
+  PROP_MAX_COLOURS,
+  PROP_TS_OFFSET
+};
+
+#define gst_dvb_sub_enc_parent_class parent_class
+G_DEFINE_TYPE (GstDvbSubEnc, gst_dvb_sub_enc, GST_TYPE_ELEMENT);
+
+static void gst_dvb_sub_enc_get_property (GObject * object, guint prop_id,
+    GValue * value, GParamSpec * pspec);
+static void gst_dvb_sub_enc_set_property (GObject * object, guint prop_id,
+    const GValue * value, GParamSpec * pspec);
+
+static gboolean gst_dvb_sub_enc_src_event (GstPad * srcpad, GstObject * parent,
+    GstEvent * event);
+static GstFlowReturn gst_dvb_sub_enc_chain (GstPad * pad, GstObject * parent,
+    GstBuffer * buf);
+
+static void gst_dvb_sub_enc_finalize (GObject * gobject);
+static gboolean gst_dvb_sub_enc_sink_event (GstPad * pad, GstObject * parent,
+    GstEvent * event);
+static gboolean gst_dvb_sub_enc_sink_setcaps (GstPad * pad, GstCaps * caps);
+
+static GstStaticPadTemplate sink_template = GST_STATIC_PAD_TEMPLATE ("sink",
+    GST_PAD_SINK,
+    GST_PAD_ALWAYS,
+    GST_STATIC_CAPS ("video/x-raw, format = (string) { AYUV }")
+    );
+
+static GstStaticPadTemplate src_template = GST_STATIC_PAD_TEMPLATE ("src",
+    GST_PAD_SRC,
+    GST_PAD_ALWAYS,
+    GST_STATIC_CAPS ("subpicture/x-dvb")
+    );
+
+GST_DEBUG_CATEGORY (gst_dvb_sub_enc_debug);
+
+static void
+gst_dvb_sub_enc_class_init (GstDvbSubEncClass * klass)
+{
+  GObjectClass *gobject_class;
+  GstElementClass *gstelement_class;
+
+  gobject_class = (GObjectClass *) klass;
+  gstelement_class = (GstElementClass *) klass;
+
+  gobject_class->finalize = gst_dvb_sub_enc_finalize;
+
+  gst_element_class_add_static_pad_template (gstelement_class, &sink_template);
+  gst_element_class_add_static_pad_template (gstelement_class, &src_template);
+
+  gst_element_class_set_static_metadata (gstelement_class,
+      "DVB subtitle encoder", "Codec/Decoder/Video",
+      "Encodes AYUV video frames streams into DVB subtitles",
+      "Jan Schmidt <jan@centricular.com>");
+
+  gobject_class->set_property = gst_dvb_sub_enc_set_property;
+  gobject_class->get_property = gst_dvb_sub_enc_get_property;
+
+ /**
+  * GstDvbSubEnc:max-colours
+  *
+  * Set the maximum number of colours to output into the DVB subpictures.
+  * Good choices are 4, 16 or 256 - as they correspond to the 2-bit, 4-bit
+  * and 8-bit palette modes that the DVB subpicture encoding supports.
+  */
+  g_object_class_install_property (G_OBJECT_CLASS (klass), PROP_MAX_COLOURS,
+      g_param_spec_int ("max-colours", "Maximum Colours",
+          "Maximum Number of Colours to output", 1, 256, DEFAULT_MAX_COLOURS,
+          G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS));
+
+ /**
+  * GstDvbSubEnc:ts-offset
+  *
+  * Advance or delay the output subpicture time-line. This is a
+  * convenience property for setting the src pad offset.
+  */
+  g_object_class_install_property (gobject_class, PROP_TS_OFFSET,
+      g_param_spec_int64 ("ts-offset", "Subtitle Timestamp Offset",
+          "Apply an offset to incoming timestamps before output (in nanoseconds)",
+          G_MININT64, G_MAXINT64, 0,
+          G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS));
+
+}
+
+static void
+gst_dvb_sub_enc_init (GstDvbSubEnc * enc)
+{
+  GstPadTemplate *tmpl;
+
+  enc->sinkpad = gst_pad_new_from_static_template (&sink_template, "sink");
+  gst_pad_set_chain_function (enc->sinkpad,
+      GST_DEBUG_FUNCPTR (gst_dvb_sub_enc_chain));
+  gst_pad_set_event_function (enc->sinkpad,
+      GST_DEBUG_FUNCPTR (gst_dvb_sub_enc_sink_event));
+  gst_element_add_pad (GST_ELEMENT (enc), enc->sinkpad);
+
+  tmpl = gst_static_pad_template_get (&src_template);
+  enc->srcpad = gst_pad_new_from_template (tmpl, "src");
+  gst_pad_set_event_function (enc->srcpad,
+      GST_DEBUG_FUNCPTR (gst_dvb_sub_enc_src_event));
+  gst_pad_use_fixed_caps (enc->srcpad);
+  gst_object_unref (tmpl);
+  gst_element_add_pad (GST_ELEMENT (enc), enc->srcpad);
+
+  enc->max_colours = DEFAULT_MAX_COLOURS;
+  enc->ts_offset = DEFAULT_TS_OFFSET;
+
+  enc->current_end_time = GST_CLOCK_TIME_NONE;
+}
+
+static void
+gst_dvb_sub_enc_finalize (GObject * gobject)
+{
+  //GstDvbSubEnc *enc = GST_DVB_SUB_ENC (gobject);
+
+  G_OBJECT_CLASS (parent_class)->finalize (gobject);
+}
+
+static void
+gst_dvb_sub_enc_get_property (GObject * object, guint prop_id,
+    GValue * value, GParamSpec * pspec)
+{
+  GstDvbSubEnc *enc = GST_DVB_SUB_ENC (object);
+
+  switch (prop_id) {
+    case PROP_MAX_COLOURS:
+      g_value_set_int (value, enc->max_colours);
+      break;
+    case PROP_TS_OFFSET:
+      g_value_set_int64 (value, enc->ts_offset);
+      break;
+    default:
+      G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
+      break;
+  }
+}
+
+static void
+gst_dvb_sub_enc_set_property (GObject * object, guint prop_id,
+    const GValue * value, GParamSpec * pspec)
+{
+  GstDvbSubEnc *enc = GST_DVB_SUB_ENC (object);
+
+  switch (prop_id) {
+    case PROP_MAX_COLOURS:
+      enc->max_colours = g_value_get_int (value);
+      break;
+    case PROP_TS_OFFSET:
+      enc->ts_offset = g_value_get_int64 (value);
+      gst_pad_set_offset (enc->srcpad, enc->ts_offset);
+      break;
+    default:
+      G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
+      break;
+  }
+}
+
+static gboolean
+gst_dvb_sub_enc_src_event (GstPad * pad, GstObject * parent, GstEvent * event)
+{
+  gboolean res = FALSE;
+
+  switch (GST_EVENT_TYPE (event)) {
+    default:
+      res = gst_pad_event_default (pad, parent, event);
+      break;
+  }
+
+  return res;
+}
+
+static void
+find_largest_subregion (guint8 * pixels, guint stride, guint pixel_stride,
+    gint width, gint height, guint * out_left, guint * out_right,
+    guint * out_top, guint * out_bottom)
+{
+  guint left = width, right = 0, top = height, bottom = 0;
+  gint y, x;
+  guint8 *p = pixels;
+
+  for (y = 0; y < height; y++) {
+    gboolean visible_pixels = FALSE;
+    guint8 *l = p;
+    guint8 *r = p + (width - 1) * pixel_stride;
+
+    for (x = 0; x < width; x++) {
+      /* AYUV data = byte 0 = A */
+      if (l[0] != 0) {
+        visible_pixels = TRUE;
+        left = MIN (left, x);
+      }
+      if (r[0] != 0) {
+        visible_pixels = TRUE;
+        right = MAX (right, width - 1 - x);
+      }
+
+      l += pixel_stride;
+      r -= pixel_stride;
+
+      if (l >= r)               /* Stop when we've scanned to the middle */
+        break;
+    }
+
+    if (visible_pixels) {
+      if (top > y)
+        top = y;
+      if (bottom < y)
+        bottom = y;
+    }
+    p += stride;
+  }
+
+  *out_left = left;
+  *out_right = right;
+  *out_top = top;
+  *out_bottom = bottom;
+}
+
+/* Create and map a new buffer containing the indicated subregion of the input
+ * image, returning the result in the 'out' GstVideoFrame */
+static gboolean
+create_cropped_frame (GstDvbSubEnc * enc, GstVideoFrame * in,
+    GstVideoFrame * out, guint x, guint y, guint width, guint height)
+{
+  GstBuffer *cropped_buffer;
+  GstVideoInfo cropped_info;
+  guint8 *out_pixels, *in_pixels;
+  guint out_stride, in_stride, p_stride;
+  guint bottom = y + height;
+
+  g_return_val_if_fail (GST_VIDEO_INFO_FORMAT (&in->info) ==
+      GST_VIDEO_FORMAT_AYUV, FALSE);
+
+  gst_video_info_set_format (&cropped_info, GST_VIDEO_INFO_FORMAT (&in->info),
+      width, height);
+  cropped_buffer =
+      gst_buffer_new_allocate (NULL, GST_VIDEO_INFO_SIZE (&cropped_info), NULL);
+
+  if (!gst_video_frame_map (out, &cropped_info, cropped_buffer, GST_MAP_WRITE)) {
+    gst_buffer_unref (cropped_buffer);
+    return FALSE;
+  }
+
+  p_stride = GST_VIDEO_FRAME_COMP_PSTRIDE (in, 0);
+  in_stride = GST_VIDEO_FRAME_PLANE_STRIDE (in, 0);
+  in_pixels = GST_VIDEO_FRAME_PLANE_DATA (in, 0);
+
+  out_stride = GST_VIDEO_FRAME_PLANE_STRIDE (out, 0);
+  out_pixels = GST_VIDEO_FRAME_PLANE_DATA (out, 0);
+
+  in_pixels += y * in_stride + x * p_stride;
+
+  while (y < bottom) {
+    memcpy (out_pixels, in_pixels, width * p_stride);
+
+    in_pixels += in_stride;
+    out_pixels += out_stride;
+    y++;
+  }
+
+  /* By mapping the video frame no ref, it takes ownership of the buffer and it will be released
+   * on unmap (if the map call succeeds) */
+  gst_video_frame_unmap (out);
+  if (!gst_video_frame_map (out, &cropped_info, cropped_buffer,
+          GST_MAP_READ | GST_VIDEO_FRAME_MAP_FLAG_NO_REF)) {
+    gst_buffer_unref (cropped_buffer);
+    return FALSE;
+  }
+  return TRUE;
+}
+
+static GstFlowReturn
+process_largest_subregion (GstDvbSubEnc * enc, GstVideoFrame * vframe)
+{
+  GstFlowReturn ret = GST_FLOW_ERROR;
+
+  guint8 *pixels = GST_VIDEO_FRAME_PLANE_DATA (vframe, 0);
+  guint stride = GST_VIDEO_FRAME_PLANE_STRIDE (vframe, 0);
+  guint pixel_stride = GST_VIDEO_FRAME_COMP_PSTRIDE (vframe, 0);
+  guint left, right, top, bottom;
+  GstBuffer *ayuv8p_buffer;
+  GstVideoInfo ayuv8p_info;
+  GstVideoFrame cropped_frame, ayuv8p_frame;
+  guint32 num_colours;
+  GstClockTime end_ts = GST_CLOCK_TIME_NONE, duration;
+
+  find_largest_subregion (pixels, stride, pixel_stride, enc->in_info.width,
+      enc->in_info.height, &left, &right, &top, &bottom);
+
+  GST_LOG_OBJECT (enc, "Found subregion %u,%u -> %u,%u w %u, %u", left, top,
+      right, bottom, right - left + 1, bottom - top + 1);
+
+  if (!create_cropped_frame (enc, vframe, &cropped_frame, left, top,
+          right - left + 1, bottom - top + 1)) {
+    GST_WARNING_OBJECT (enc, "Failed to map frame conversion input buffer");
+    goto fail;
+  }
+
+  /* FIXME: RGB8P is the same size as what we're building, so this is fine,
+   * but it'd be better if we had an explicit paletted format for YUV8P */
+  gst_video_info_set_format (&ayuv8p_info, GST_VIDEO_FORMAT_RGB8P,
+      right - left + 1, bottom - top + 1);
+  ayuv8p_buffer =
+      gst_buffer_new_allocate (NULL, GST_VIDEO_INFO_SIZE (&ayuv8p_info), NULL);
+
+  /* Mapped without extra ref - the frame now owns the only ref */
+  if (!gst_video_frame_map (&ayuv8p_frame, &ayuv8p_info, ayuv8p_buffer,
+          GST_MAP_WRITE | GST_VIDEO_FRAME_MAP_FLAG_NO_REF)) {
+    GST_WARNING_OBJECT (enc, "Failed to map frame conversion output buffer");
+    gst_video_frame_unmap (&cropped_frame);
+    gst_buffer_unref (ayuv8p_buffer);
+    goto fail;
+  }
+
+  if (!gst_dvbsubenc_ayuv_to_ayuv8p (&cropped_frame, &ayuv8p_frame,
+          enc->max_colours, &num_colours)) {
+    GST_ERROR_OBJECT (enc,
+        "Failed to convert subpicture region to paletted 8-bit");
+    gst_video_frame_unmap (&cropped_frame);
+    gst_video_frame_unmap (&ayuv8p_frame);
+    goto skip;
+  }
+
+  gst_video_frame_unmap (&cropped_frame);
+
+  duration = GST_BUFFER_DURATION (vframe->buffer);
+
+  if (GST_CLOCK_TIME_IS_VALID (duration)) {
+    end_ts = GST_BUFFER_PTS (vframe->buffer);
+    if (GST_CLOCK_TIME_IS_VALID (end_ts)) {
+      end_ts += duration;
+    }
+  }
+
+  /* Encode output buffer and push it */
+  {
+    SubpictureRect s;
+    GstBuffer *packet;
+
+    s.frame = &ayuv8p_frame;
+    s.nb_colours = num_colours;
+    s.x = left;
+    s.y = top;
+
+    packet = gst_dvbenc_encode (enc->object_version & 0xF, 1, &s, 1);
+    if (packet == NULL) {
+      gst_video_frame_unmap (&ayuv8p_frame);
+      goto fail;
+    }
+
+    enc->object_version++;
+
+    gst_buffer_copy_into (packet, vframe->buffer, GST_BUFFER_COPY_METADATA, 0,
+        -1);
+
+    if (!GST_BUFFER_DTS_IS_VALID (packet))
+      GST_BUFFER_DTS (packet) = GST_BUFFER_PTS (packet);
+
+    ret = gst_pad_push (enc->srcpad, packet);
+  }
+
+  if (GST_CLOCK_TIME_IS_VALID (end_ts)) {
+    GST_LOG_OBJECT (enc, "Scheduling subtitle end packet for %" GST_TIME_FORMAT,
+        GST_TIME_ARGS (end_ts));
+    enc->current_end_time = end_ts;
+  }
+
+  gst_video_frame_unmap (&ayuv8p_frame);
+
+  return ret;
+skip:
+  return GST_FLOW_OK;
+fail:
+  return GST_FLOW_ERROR;
+}
+
+static GstFlowReturn
+gst_dvb_sub_enc_generate_end_packet (GstDvbSubEnc * enc, GstClockTime pts)
+{
+  GstBuffer *packet;
+  GstFlowReturn ret;
+
+  if (!GST_CLOCK_TIME_IS_VALID (enc->current_end_time))
+    return GST_FLOW_OK;
+
+  if (enc->current_end_time >= pts)
+    return GST_FLOW_OK;         /* Didn't hit the end of the current subtitle yet */
+
+  GST_DEBUG_OBJECT (enc, "Outputting end of page at TS %" GST_TIME_FORMAT,
+      GST_TIME_ARGS (enc->current_end_time));
+
+  packet = gst_dvbenc_encode (enc->object_version & 0xF, 1, NULL, 0);
+  if (packet == NULL) {
+    GST_ELEMENT_ERROR (enc, STREAM, FAILED,
+        ("Internal data stream error."),
+        ("Failed to encode end of subtitle packet"));
+    return GST_FLOW_ERROR;
+  }
+
+  enc->object_version++;
+
+  GST_BUFFER_DTS (packet) = GST_BUFFER_PTS (packet) = enc->current_end_time;
+  enc->current_end_time = GST_CLOCK_TIME_NONE;
+
+  ret = gst_pad_push (enc->srcpad, packet);
+
+  return ret;
+}
+
+static GstFlowReturn
+gst_dvb_sub_enc_chain (GstPad * pad, GstObject * parent, GstBuffer * buf)
+{
+  GstFlowReturn ret = GST_FLOW_OK;
+  GstDvbSubEnc *enc = GST_DVB_SUB_ENC (parent);
+  GstVideoFrame vframe;
+  GstClockTime pts = GST_BUFFER_PTS (buf);
+
+  GST_DEBUG_OBJECT (enc, "Have buffer of size %" G_GSIZE_FORMAT ", ts %"
+      GST_TIME_FORMAT ", dur %" G_GINT64_FORMAT, gst_buffer_get_size (buf),
+      GST_TIME_ARGS (GST_BUFFER_TIMESTAMP (buf)), GST_BUFFER_DURATION (buf));
+
+  if (GST_CLOCK_TIME_IS_VALID (pts)) {
+    ret = gst_dvb_sub_enc_generate_end_packet (enc, pts);
+    if (ret != GST_FLOW_OK)
+      goto fail;
+  }
+
+  /* FIXME: Allow GstVideoOverlayComposition input, so we can directly encode the
+   * overlays passed */
+
+  /* Scan the input buffer for regions to encode */
+  /* FIXME: Could use the blob extents tracking code from OpenHMD here to collect
+   * multiple regions*/
+  if (!gst_video_frame_map (&vframe, &enc->in_info, buf, GST_MAP_READ)) {
+    GST_ERROR_OBJECT (enc, "Failed to map input buffer for reading");
+    ret = GST_FLOW_ERROR;
+    goto fail;
+  }
+
+  ret = process_largest_subregion (enc, &vframe);
+  gst_video_frame_unmap (&vframe);
+
+fail:
+  gst_buffer_unref (buf);
+  return ret;
+}
+
+static gboolean
+gst_dvb_sub_enc_sink_setcaps (GstPad * pad, GstCaps * caps)
+{
+  GstDvbSubEnc *enc = GST_DVB_SUB_ENC (gst_pad_get_parent (pad));
+  gboolean ret = FALSE;
+  GstCaps *out_caps = NULL;
+
+  GST_DEBUG_OBJECT (enc, "setcaps called with %" GST_PTR_FORMAT, caps);
+  if (!gst_video_info_from_caps (&enc->in_info, caps)) {
+    GST_ERROR_OBJECT (enc, "Failed to parse input caps");
+    return FALSE;
+  }
+
+  out_caps = gst_caps_new_simple ("subpicture/x-dvb",
+      "width", G_TYPE_INT, enc->in_info.width,
+      "height", G_TYPE_INT, enc->in_info.height,
+      "framerate", GST_TYPE_FRACTION, enc->in_info.fps_n, enc->in_info.fps_d,
+      NULL);
+
+  if (!gst_pad_set_caps (enc->srcpad, out_caps)) {
+    GST_WARNING_OBJECT (enc, "failed setting downstream caps");
+    gst_caps_unref (out_caps);
+    goto beach;
+  }
+
+  gst_caps_unref (out_caps);
+  ret = TRUE;
+
+beach:
+  gst_object_unref (enc);
+  return ret;
+}
+
+static gboolean
+gst_dvb_sub_enc_sink_event (GstPad * pad, GstObject * parent, GstEvent * event)
+{
+  GstDvbSubEnc *enc = GST_DVB_SUB_ENC (parent);
+  gboolean ret = FALSE;
+
+  GST_LOG_OBJECT (enc, "%s event", GST_EVENT_TYPE_NAME (event));
+
+  switch (GST_EVENT_TYPE (event)) {
+    case GST_EVENT_CAPS:
+    {
+      GstCaps *caps;
+
+      gst_event_parse_caps (event, &caps);
+      ret = gst_dvb_sub_enc_sink_setcaps (pad, caps);
+      gst_event_unref (event);
+      break;
+    }
+    case GST_EVENT_GAP:
+    {
+      GstClockTime start, duration;
+
+      gst_event_parse_gap (event, &start, &duration);
+      if (GST_CLOCK_TIME_IS_VALID (start)) {
+        if (GST_CLOCK_TIME_IS_VALID (duration))
+          start += duration;
+        /* we do not expect another buffer until after gap,
+         * so that is our position now */
+        GST_DEBUG_OBJECT (enc,
+            "Got GAP event, advancing time to %" GST_TIME_FORMAT,
+            GST_TIME_ARGS (start));
+        gst_dvb_sub_enc_generate_end_packet (enc, start);
+      } else {
+        GST_WARNING_OBJECT (enc, "Got GAP event with invalid position");
+      }
+
+      gst_event_unref (event);
+      ret = TRUE;
+      break;
+    }
+    case GST_EVENT_SEGMENT:
+    {
+      GstSegment seg;
+
+      gst_event_copy_segment (event, &seg);
+
+      ret = gst_pad_event_default (pad, parent, event);
+      break;
+    }
+    case GST_EVENT_FLUSH_STOP:{
+      enc->current_end_time = GST_CLOCK_TIME_NONE;
+
+      ret = gst_pad_event_default (pad, parent, event);
+      break;
+    }
+    default:{
+      ret = gst_pad_event_default (pad, parent, event);
+      break;
+    }
+  }
+  return ret;
+}
+
+static gboolean
+plugin_init (GstPlugin * plugin)
+{
+  if (!gst_element_register (plugin, "dvbsubenc", GST_RANK_NONE,
+          GST_TYPE_DVB_SUB_ENC)) {
+    return FALSE;
+  }
+
+  GST_DEBUG_CATEGORY_INIT (gst_dvb_sub_enc_debug, "dvbsubenc", 0,
+      "DVB subtitle encoder");
+
+  return TRUE;
+}
+
+GST_PLUGIN_DEFINE (GST_VERSION_MAJOR,
+    GST_VERSION_MINOR,
+    dvbsubenc,
+    "DVB subtitle parser and encoder", plugin_init,
+    VERSION, GST_LICENSE, GST_PACKAGE_NAME, GST_PACKAGE_ORIGIN);
diff --git a/gst/dvbsubenc/gstdvbsubenc.h b/gst/dvbsubenc/gstdvbsubenc.h
new file mode 100644
index 000000000..76d8f5fd6
--- /dev/null
+++ b/gst/dvbsubenc/gstdvbsubenc.h
@@ -0,0 +1,70 @@
+/* GStreamer
+ * Copyright (C) <2020> Jan Schmidt <jan@centricular.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#include <gst/gst.h>
+#include <gst/video/video.h>
+
+#define GST_TYPE_DVB_SUB_ENC             (gst_dvb_sub_enc_get_type())
+#define GST_DVB_SUB_ENC(obj)             (G_TYPE_CHECK_INSTANCE_CAST((obj),GST_TYPE_DVB_SUB_ENC,GstDvbSubEnc))
+#define GST_DVB_SUB_ENC_CLASS(klass)     (G_TYPE_CHECK_CLASS_CAST((klass),GST_TYPE_DVB_SUB_ENC,GstDvbSubEncClass))
+#define GST_IS_DVB_SUB_ENC(obj)          (G_TYPE_CHECK_INSTANCE_TYPE((obj),GST_TYPE_DVB_SUB_ENC))
+#define GST_IS_DVB_SUB_ENC_CLASS(klass)  (G_TYPE_CHECK_CLASS_TYPE((klass),GST_TYPE_DVB_SUB_ENC))
+
+GST_DEBUG_CATEGORY_EXTERN (gst_dvb_sub_enc_debug);
+#define GST_CAT_DEFAULT (gst_dvb_sub_enc_debug)
+
+typedef struct _GstDvbSubEnc GstDvbSubEnc;
+typedef struct _GstDvbSubEncClass GstDvbSubEncClass;
+typedef struct SubpictureRect SubpictureRect;
+
+struct SubpictureRect {
+  /* Paletted 8-bit picture */
+  GstVideoFrame *frame;
+  /* Actual number of colours used from the palette */
+  guint32 nb_colours;
+
+  guint x, y;
+};
+
+struct _GstDvbSubEnc
+{
+  GstElement element;
+
+  GstVideoInfo in_info;
+  GstPad *sinkpad;
+  GstPad *srcpad;
+
+  int object_version;
+
+  int max_colours;
+  GstClockTimeDiff ts_offset;
+
+  GstClockTime current_end_time;
+};
+
+struct _GstDvbSubEncClass
+{
+  GstElementClass parent_class;
+};
+
+GType gst_dvb_sub_enc_get_type (void);
+
+gboolean gst_dvbsubenc_ayuv_to_ayuv8p (GstVideoFrame * src, GstVideoFrame * dest, int max_colours, guint32 *out_num_colours);
+
+GstBuffer *gst_dvbenc_encode (int object_version, int page_id, SubpictureRect *s, guint num_subpictures);
diff --git a/gst/dvbsubenc/libimagequant/CHANGELOG b/gst/dvbsubenc/libimagequant/CHANGELOG
new file mode 100644
index 000000000..e096348c9
--- /dev/null
+++ b/gst/dvbsubenc/libimagequant/CHANGELOG
@@ -0,0 +1,125 @@
+version 2.4
+-----------
+ - fixed remapping of bright colors when dithering
+ - added libimagequant API to add fixed preset colors to the palette
+
+version 2.3
+-----------
+ - added ./configure script for better support of Intel C compiler and dependencies [thanks to pdknsk]
+ - tweaked quality metric to better estimate quality of images with large solid backgrounds [thanks to Rolf Timmermans]
+ - atomic file saves and fixed --skip-if-larger
+ - avoid applying quality setting to images that use palette already
+ - preserving standard PNG chunks (requires libpng 1.6)
+ - deprecated libpng 1.2 support
+
+version 2.2
+-----------
+ - preserving of unknown PNG chunks (enables optimized Android 9-patch images)
+ - improved color profile support: cHRM & gAMA as alternative to ICC profiles, OpenMP acceleration
+ - improved support for Intel C Compiler, speedup in 32-bit GCC, and some workarounds for Visual Studio's incomplete C support
+
+version 2.1
+-----------
+ - option to save files only if they're compressed better than the original
+ - option to generate posterized output (for use with 16-bit textures)
+ - support for ICC profiles via Little CMS library
+
+version 2.0
+-----------
+ - refactored codebase into pngquant and standalone libimagequant
+ - reduced memory usage by further 30% (and more for very large images)
+ - less precise remapping improving speed by 25% in higher speed settings
+ - --output option for writing converted file under the given path
+ - light dithering with --floyd=0.5
+ - fixed regression in dithering of alpha channel
+
+version 1.8
+-----------
+ - min/max quality option (number of colors is automatically adjusted for desired quality level)
+ - switched option parsing to getopt_long (syntax such as -s1 and --ext=ext is supported)
+ - significantly improved performance thanks to custom partial sorting
+ - optional Cocoa (Mac OS X) image reader for color profile support
+ - reduced memory usage by 20%
+ - remapping improved for very low number of colors
+
+version 1.7
+-----------
+ - new, accurate RGBA color similarity algorithm
+ - change of optional SSE3 code to SSE2 that is always enabled on x86-64
+ - optional OpenMP-based parallelisation of remapping
+ - changed long options to use double hyphen (-force to --force) [thanks to Jari Aalto]
+
+version 1.6
+-----------
+ - novel dithering algorithm that doesn't add noise unless necessary
+ - perceptual weighting of colors taking into account edges and noise
+ - much faster remapping
+ - improved portability, makefiles and man page
+
+version 1.5
+-----------
+ - palettes postprocessed with Voronoi iteration
+ - better RGBA color similarity algorithm and Floyd-Steinberg remapping
+ - SSE optimisations
+
+version 1.4
+-----------
+ - median cut is applied many times in a feedback loop
+ - speed/quality trade-off option
+ - faster remap of transparent areas
+
+version 1.3
+-----------
+ - significant changes to the algorithm: use of variance
+   to find largest dimensioin and to split most varying boxes
+ - use of premultiplied alpha for color blending
+ - conversion of output to gamma 2.2
+
+version 1.2
+-----------
+ - color computation done in floating point
+ - gamma correction applied
+ - dropped support for very old systems & compilers
+
+version 1.1
+-----------
+ - alpha-sensitive color reduction and dithering
+ - support -- and - arguments in command line
+ - number of colors optional (defaults to 256)
+ - increased maximum number of colors in histogram
+
+version 1.0
+-----------
+ - cleaned up Makefile.unx (better gcc optimizations, "clean" target)
+ - recompiled binaries with zlib 1.1.4
+
+version 0.95
+------------
+ - fixed Win32 filter bug (binary mode for stdin/stdout)
+ - fixed cosmetic "choosing colors" verbosity buglet
+ - fixed palette-size bug when number of colors in image < number requested
+ - fixed sample-depth bug (png_set_packing() not retroactively smart)
+
+version 0.91
+------------
+ - fixed some verbose/non-verbose oopers
+ - fixed Win32 (MSVC) portability issues (getpid(), random(), srandom())
+ - added Makefile.w32 for MSVC (tested with 5.0)
+
+version 0.90
+------------
+ - added support for multiple files on command line
+ - changed stdin support to write PNG stream to stdout (not "stdin-fs8.png")
+
+version 0.75
+------------
+ - added support for any type of input file [Glenn Randers-Pehrson]
+ - fixed palette-(re)scaling bug
+ - added -verbose and -quiet options (default now is -quiet)
+ - added palette-remapping to minimize size of tRNS chunk
+ - made Floyd-Steinberg dithering default
+ - changed output naming scheme to -fs8.png and -or8.png (FS or ordered dither)
+
+version 0.70
+------------
+ - first public release
diff --git a/gst/dvbsubenc/libimagequant/COPYRIGHT b/gst/dvbsubenc/libimagequant/COPYRIGHT
new file mode 100644
index 000000000..98de36b4d
--- /dev/null
+++ b/gst/dvbsubenc/libimagequant/COPYRIGHT
@@ -0,0 +1,36 @@
+© 1997-2002 by Greg Roelofs; based on an idea by Stefan Schneider.
+© 2009-2014 by Kornel Lesiński.
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice,
+   this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+© 1989, 1991 by Jef Poskanzer.
+
+Permission to use, copy, modify, and distribute this software and its
+documentation for any purpose and without fee is hereby granted, provided
+that the above copyright notice appear in all copies and that both that
+copyright notice and this permission notice appear in supporting
+documentation.  This software is provided "as is" without express or
+implied warranty.
diff --git a/gst/dvbsubenc/libimagequant/README.md b/gst/dvbsubenc/libimagequant/README.md
new file mode 100644
index 000000000..79d956666
--- /dev/null
+++ b/gst/dvbsubenc/libimagequant/README.md
@@ -0,0 +1,490 @@
+# libimagequant—Image Quantization Library
+
+Small, portable C library for high-quality conversion of RGBA images to 8-bit indexed-color (palette) images.
+It's powering [pngquant2](http://pngquant.org).
+
+## License
+
+[BSD](https://raw.github.com/pornel/pngquant/master/lib/COPYRIGHT).
+It can be linked with both free and closed-source software.
+
+## Download
+
+The [library](http://pngquant.org/lib) is currently a part of the [pngquant2 project](https://github.com/pornel/pngquant/tree/master/lib).
+
+Files needed for the library are only in the `lib/` directory inside the repository (and you can ignore the rest).
+
+## Compiling and Linking
+
+The library can be linked with ANSI C and C++ programs. It has no external dependencies.
+
+To build on Unix-like systems run:
+
+    make -C lib
+
+it will create `lib/libimagequant.a` which you can link with your program.
+
+    gcc yourprogram.c /path/to/lib/libimagequant.a
+
+On BSD, use `gmake` (GNU make) rather than the native `make`.
+
+Alternatively you can compile the library with your program simply by including all `.c` files (and define `NDEBUG` to get a fast version):
+
+    gcc -std=c99 -O3 -DNDEBUG lib/*.c yourprogram.c
+
+### Compiling on Windows/Visual Studio
+
+The library can be compiled with any C compiler that has at least basic support for C99 (GCC, clang, ICC, C++ Builder, even Tiny C Compiler), but Visual Studio 2012 and older are not up to date with the 1999 C standard. There are 2 options for using `libimagequant` on Windows:
+
+ * Use Visual Studio **2013** (MSVC 18) and an [MSVC-compatible branch of the library](https://github.com/pornel/pngquant/tree/msvc/lib)
+ * Or use GCC from [MinGW](http://www.mingw.org). Use GCC to build `libimagequant.a` (using the instructions above for Unix) and add it along with `libgcc.a` (shipped with the MinGW compiler) to your VC project.
+
+## Overview
+
+The basic flow is:
+
+1. Create attributes object and configure the library.
+2. Create image object from RGBA bitmap or data source.
+3. Perform quantization (generate palette).
+4. Store remapped image and final palette.
+5. Free memory.
+
+Please note that libimagequant only handles raw uncompressed bitmaps in memory and is completely independent of any file format.
+
+<p>
+
+    #include "lib/libimagequant.h"
+
+    liq_attr *attr = liq_attr_create();
+    liq_image *image = liq_image_create_rgba(attr, bitmap, width, height, 0);
+    liq_result *res = liq_quantize_image(attr, image);
+
+    liq_write_remapped_image(res, image, bitmap, bitmap_size);
+    const liq_palette *pal = liq_get_palette(res);
+
+    // use image and palette here
+
+    liq_attr_destroy(attr);
+    liq_image_destroy(image);
+    liq_result_destroy(res);
+
+Functions returning `liq_error` return `LIQ_OK` (`0`) on success and non-zero on error.
+
+It's safe to pass `NULL` to any function accepting `liq_attr`, `liq_image`, `liq_result` (in that case the error code `LIQ_INVALID_POINTER` will be returned). These objects can be reused multiple times.
+
+There are 3 ways to create image object for quantization:
+
+  * `liq_image_create_rgba()` for simple, contiguous RGBA bitmaps (width×height×4 bytes large array).
+  * `liq_image_create_rgba_rows()` for non-contiguous RGBA bitmaps (that have padding between rows or reverse order, e.g. BMP).
+  * `liq_image_create_custom()` for RGB, ABGR, YUV and all other formats that can be converted on-the-fly to RGBA (you have to supply the conversion function).
+
+Note that "image" here means raw uncompressed pixels. If you have a compressed image file, such as PNG, you must use another library (e.g. libpng or lodepng) to decode it first.
+
+## Functions
+
+----
+
+    liq_attr* liq_attr_create(void);
+
+Returns object that will hold initial settings (attributes) for the library. The object should be freed using `liq_attr_destroy()` after it's no longer needed.
+
+Returns `NULL` in the unlikely case that the library cannot run on the current machine (e.g. the library has been compiled for SSE-capable x86 CPU and run on VIA C3 CPU).
+
+----
+
+    liq_error liq_set_max_colors(liq_attr* attr, int colors);
+
+Specifies maximum number of colors to use. The default is 256. Instead of setting a fixed limit it's better to use `liq_set_quality()`.
+
+Returns `LIQ_VALUE_OUT_OF_RANGE` if number of colors is outside the range 2-256.
+
+----
+
+    int liq_get_max_colors(liq_attr* attr);
+
+Returns the value set by `liq_set_max_colors()`.
+
+----
+
+    liq_error liq_set_quality(liq_attr* attr, int minimum, int maximum);
+
+Quality is in range `0` (worst) to `100` (best) and values are analoguous to JPEG quality (i.e. `80` is usually good enough).
+
+Quantization will attempt to use the lowest number of colors needed to achieve `maximum` quality. `maximum` value of `100` is the default and means conversion as good as possible.
+
+If it's not possible to convert the image with at least `minimum` quality (i.e. 256 colors is not enough to meet the minimum quality), then `liq_quantize_image()` will fail. The default minumum is `0` (proceeds regardless of quality).
+
+Quality measures how well the generated palette fits image given to `liq_quantize_image()`. If a different image is remapped with `liq_write_remapped_image()` then actual quality may be different.
+
+Regardless of the quality settings the number of colors won't exceed the maximum (see `liq_set_max_colors()`).
+
+Returns `LIQ_VALUE_OUT_OF_RANGE` if target is lower than minimum or any of them is outside the 0-100 range.
+Returns `LIQ_INVALID_POINTER` if `attr` appears to be invalid.
+
+    liq_attr *attr = liq_attr_create();
+    liq_set_quality(attr, 50, 80); // use quality 80 if possible. Give up if quality drops below 50.
+
+----
+
+    int liq_get_min_quality(liq_attr* attr);
+
+Returns the lower bound set by `liq_set_quality()`.
+
+----
+
+    int liq_get_max_quality(liq_attr* attr);
+
+Returns the upper bound set by `liq_set_quality()`.
+
+----
+
+    liq_image *liq_image_create_rgba(liq_attr *attr, void* bitmap, int width, int height, double gamma);
+
+Creates image object that represents a bitmap later used for quantization and remapping. The bitmap must be contiguous run of RGBA pixels (alpha is the last component, 0 = transparent, 255 = opaque).
+
+The bitmap must not be modified or freed until this object is freed with `liq_image_destroy()`. See also `liq_image_set_memory_ownership()`.
+
+`width` and `height` are dimensions in pixels. An image 10x10 pixel large will need 400-byte bitmap.
+
+`gamma` can be `0` for images with the typical 1/2.2 [gamma](http://en.wikipedia.org/wiki/Gamma_correction).
+Otherwise `gamma` must be > 0 and < 1, e.g. `0.45455` (1/2.2) or `0.55555` (1/1.8). Generated palette will use the same gamma unless `liq_set_output_gamma()` is used. If `liq_set_output_gamma` is not used, then it only affects whether brighter or darker areas of the image will get more palette colors allocated.
+
+Returns `NULL` on failure, e.g. if `bitmap` is `NULL` or `width`/`height` is <= 0.
+
+----
+
+    liq_image *liq_image_create_rgba_rows(liq_attr *attr, void* rows[], int width, int height, double gamma);
+
+Same as `liq_image_create_rgba()`, but takes array of pointers to rows in the bitmap. This allows defining bitmaps with reversed rows (like in BMP), "stride" different than width or using only fragment of a larger bitmap, etc.
+
+`rows` array must have at least `height` elements and each row must be at least `width` RGBA pixels wide.
+
+    unsigned char *bitmap = …;
+    void *rows = malloc(height * sizeof(void*));
+    int bytes_per_row = width * 4 + padding; // stride
+    for(int i=0; i < height; i++) {
+        rows[i] = bitmap + i * bytes_per_row;
+    }
+    liq_image *img = liq_image_create_rgba_rows(attr, rows, width, height, 0);
+    // …
+    liq_image_destroy(img);
+    free(rows);
+
+The row pointers and bitmap must not be modified or freed until this object is freed with `liq_image_destroy()` (you can change that with `liq_image_set_memory_ownership()`).
+
+See also `liq_image_create_rgba()` and `liq_image_create_custom()`.
+
+----
+
+    liq_result *liq_quantize_image(liq_attr *attr, liq_image *input_image);
+
+Performs quantization (palette generation) based on settings in `attr` and pixels of the image.
+
+Returns `NULL` if quantization fails, e.g. due to limit set in `liq_set_quality()`.
+
+See `liq_write_remapped_image()`.
+
+----
+
+    liq_error liq_set_dithering_level(liq_result *res, float dither_level);
+
+Enables/disables dithering in `liq_write_remapped_image()`. Dithering level must be between `0` and `1` (inclusive). Dithering level `0` enables fast non-dithered remapping. Otherwise a variation of Floyd-Steinberg error diffusion is used.
+
+Precision of the dithering algorithm depends on the speed setting, see `liq_set_speed()`.
+
+Returns `LIQ_VALUE_OUT_OF_RANGE` if the dithering level is outside the 0-1 range.
+
+----
+
+    liq_error liq_write_remapped_image(liq_result *result, liq_image *input_image, void *buffer, size_t buffer_size);
+
+Remaps the image to palette and writes its pixels to the given buffer, 1 pixel per byte. Buffer must be large enough to fit the entire image, i.e. width×height bytes large. For safety, pass size of the buffer as `buffer_size`.
+
+For best performance call `liq_get_palette()` *after* this function, as palette is improved during remapping.
+
+Returns `LIQ_BUFFER_TOO_SMALL` if given size of the buffer is not enough to fit the entire image.
+
+    int buffer_size = width*height;
+    char *buffer = malloc(buffer_size);
+    if (LIQ_OK == liq_write_remapped_image(result, input_image, buffer, buffer_size)) {
+        liq_palette *pal = liq_get_palette(result);
+        // save image
+    }
+
+See `liq_get_palette()` and `liq_write_remapped_image_rows()`.
+
+Please note that it only writes raw uncompressed pixels to memory. It does not perform any compression. If you'd like to create a PNG file then you need to pass the raw pixel data to another library, e.g. libpng or lodepng. See `rwpng.c` in `pngquant` project for an example how to do that.
+
+----
+
+    const liq_palette *liq_get_palette(liq_result *result);
+
+Returns pointer to palette optimized for image that has been quantized or remapped (final refinements are applied to the palette during remapping).
+
+It's valid to call this method before remapping, if you don't plan to remap any images or want to use same palette for multiple images.
+
+`liq_palette->count` contains number of colors (up to 256), `liq_palette->entries[n]` contains RGBA value for nth palette color.
+
+The palette is **temporary and read-only**. You must copy the palette elsewhere *before* calling `liq_result_destroy()`.
+
+Returns `NULL` on error.
+
+----
+
+    void liq_attr_destroy(liq_attr *);
+    void liq_image_destroy(liq_image *);
+    void liq_result_destroy(liq_result *);
+
+Releases memory owned by the given object. Object must not be used any more after it has been freed.
+
+Freeing `liq_result` also frees any `liq_palette` obtained from it.
+
+## Advanced Functions
+
+----
+
+    liq_error liq_set_speed(liq_attr* attr, int speed);
+
+Higher speed levels disable expensive algorithms and reduce quantization precision. The default speed is `3`. Speed `1` gives marginally better quality at significant CPU cost. Speed `10` has usually 5% lower quality, but is 8 times faster than the default.
+
+High speeds combined with `liq_set_quality()` will use more colors than necessary and will be less likely to meet minimum required quality.
+
+<table><caption>Features dependent on speed</caption>
+<tr><th>Noise-sensitive dithering</th><td>speed 1 to 5</td></tr>
+<tr><th>Forced posterization</th><td>8-10 or if image has more than million colors</td></tr>
+<tr><th>Quantization error known</th><td>1-7 or if minimum quality is set</td></tr>
+<tr><th>Additional quantization techniques</th><td>1-6</td></tr>
+</table>
+
+Returns `LIQ_VALUE_OUT_OF_RANGE` if the speed is outside the 1-10 range.
+
+----
+
+    int liq_get_speed(liq_attr* attr);
+
+Returns the value set by `liq_set_speed()`.
+
+----
+
+    liq_error liq_set_min_opacity(liq_attr* attr, int min);
+
+Alpha values higher than this will be rounded to opaque. This is a workaround for Internet Explorer 6 that truncates semitransparent values to completely transparent. The default is `255` (no change). 238 is a suggested value.
+
+Returns `LIQ_VALUE_OUT_OF_RANGE` if the value is outside the 0-255 range.
+
+----
+
+    int liq_get_min_opacity(liq_attr* attr);
+
+Returns the value set by `liq_set_min_opacity()`.
+
+----
+
+    liq_set_min_posterization(liq_attr* attr, int bits);
+
+Ignores given number of least significant bits in all channels, posterizing image to `2^bits` levels. `0` gives full quality. Use `2` for VGA or 16-bit RGB565 displays, `4` if image is going to be output on a RGB444/RGBA4444 display (e.g. low-quality textures on Android).
+
+Returns `LIQ_VALUE_OUT_OF_RANGE` if the value is outside the 0-4 range.
+
+----
+
+    int liq_get_min_posterization(liq_attr* attr);
+
+Returns the value set by `liq_set_min_posterization()`.
+
+----
+
+    liq_set_last_index_transparent(liq_attr* attr, int is_last);
+
+`0` (default) makes alpha colors sorted before opaque colors. Non-`0` mixes colors together except completely transparent color, which is moved to the end of the palette. This is a workaround for programs that blindly assume the last palette entry is transparent.
+
+----
+
+    liq_image *liq_image_create_custom(liq_attr *attr, liq_image_get_rgba_row_callback *row_callback, void *user_info, int width, int height, double gamma);
+
+<p>
+
+    void image_get_rgba_row_callback(liq_color row_out[], int row_index, int width, void *user_info) {
+        for(int column_index=0; column_index < width; column_index++) {
+            row_out[column_index] = /* generate pixel at (row_index, column_index) */;
+        }
+    }
+
+Creates image object that will use callback to read image data. This allows on-the-fly conversion of images that are not in the RGBA color space.
+
+`user_info` value will be passed to the callback. It may be useful for storing pointer to program's internal representation of the image.
+
+The callback must read/generate `row_index`-th row and write its RGBA pixels to the `row_out` array. Row `width` is given for convenience and will always equal to image width.
+
+The callback will be called multiple times for each row. Quantization and remapping require at least two full passes over image data, so caching of callback's work makes no sense — in such case it's better to convert entire image and use `liq_image_create_rgba()` instead.
+
+To use RGB image:
+
+    void rgb_to_rgba_callback(liq_color row_out[], int row_index, int width, void *user_info) {
+        unsigned char *rgb_row = ((unsigned char *)user_info) + 3*width*row_index;
+
+        for(int i=0; i < width; i++) {
+            row_out[i].r = rgb_row[i*3];
+            row_out[i].g = rgb_row[i*3+1];
+            row_out[i].b = rgb_row[i*3+2];
+            row_out[i].a = 255;
+        }
+    }
+    liq_image *img = liq_image_create_custom(attr, rgb_to_rgba_callback, rgb_bitmap, width, height, 0);
+
+The library doesn't support RGB bitmaps "natively", because supporting only single format allows compiler to inline more code, 4-byte pixel alignment is faster, and SSE instructions operate on 4 values at once, so alpha support is almost free.
+
+----
+
+    liq_error liq_image_set_memory_ownership(liq_image *image, int ownership_flags);
+
+Passes ownership of bitmap and/or rows memory to the `liq_image` object, so you don't have to free it yourself. Memory owned by the object will be freed at its discretion with `free` function specified in `liq_attr_create_with_allocator()` (by default it's stdlib's `free()`).
+
+* `LIQ_OWN_PIXELS` makes bitmap owned by the object. The bitmap will be freed automatically at any point when it's no longer needed. If you set this flag you must **not** free the bitmap yourself. If the image has been created with `liq_image_create_rgba_rows()` then the bitmap address is assumed to be the lowest address of any row.
+
+* `LIQ_OWN_ROWS` makes array of row pointers (but not bitmap pointed by these rows) owned by the object. Rows will be freed when object is deallocated. If you set this flag you must **not** free the rows array yourself. This flag is valid only if the object has been created with `liq_image_create_rgba_rows()`.
+
+These flags can be combined with binary *or*, i.e. `LIQ_OWN_PIXELS | LIQ_OWN_ROWS`.
+
+This function must not be used if the image has been created with `liq_image_create_custom()`.
+
+Returns `LIQ_VALUE_OUT_OF_RANGE` if invalid flags are specified or image is not backed by a bitmap.
+
+----
+
+    liq_error liq_write_remapped_image_rows(liq_result *result, liq_image *input_image, unsigned char **row_pointers);
+
+Similar to `liq_write_remapped_image()`. Writes remapped image, at 1 byte per pixel, to each row pointed by `row_pointers` array. The array must have at least as many elements as height of the image, and each row must have at least as many bytes as width of the image. Rows must not overlap.
+
+For best performance call `liq_get_palette()` *after* this function, as remapping may change the palette.
+
+Returns `LIQ_INVALID_POINTER` if `result` or `input_image` is `NULL`.
+
+----
+
+    double liq_get_quantization_error(liq_result *result);
+
+Returns mean square error of quantization (square of difference between pixel values in the original image and remapped image). Alpha channel and gamma correction are taken into account, so the result isn't exactly the mean square error of all channels.
+
+For most images MSE 1-5 is excellent. 7-10 is OK. 20-30 will have noticeable errors. 100 is awful.
+
+This function should be called *after* `liq_write_remapped_image()`. It may return `-1` if the value is not available (this is affected by `liq_set_speed()` and `liq_set_quality()`).
+
+----
+
+    double liq_get_quantization_quality(liq_result *result);
+
+Analoguous to `liq_get_quantization_error()`, but returns quantization error as quality value in the same 0-100 range that is used by `liq_set_quality()`.
+
+This function should be called *after* `liq_write_remapped_image()`. It may return `-1` if the value is not available (this is affected by `liq_set_speed()` and `liq_set_quality()`).
+
+This function can be used to add upper limit to quality options presented to the user, e.g.
+
+    liq_attr *attr = liq_attr_create();
+    liq_image *img = liq_image_create_rgba(…);
+    liq_result *res = liq_quantize_image(attr, img);
+    int max_attainable_quality = liq_get_quantization_quality(res);
+    printf("Please select quality between 0 and %d: ", max_attainable_quality);
+    int user_selected_quality = prompt();
+    if (user_selected_quality < max_attainable_quality) {
+        liq_set_quality(user_selected_quality, 0);
+        liq_result_destroy(res);
+        res = liq_quantize_image(attr, img);
+    }
+    liq_write_remapped_image(…);
+
+----
+
+    void liq_set_log_callback(liq_attr*, liq_log_callback_function*, void *user_info);
+
+<p>
+
+    void log_callback_function(const liq_attr*, const char *message, void *user_info) {}
+
+----
+
+    void liq_set_log_flush_callback(liq_attr*, liq_log_flush_callback_function*, void *user_info);
+<p>
+
+    void log_flush_callback_function(const liq_attr*, void *user_info) {}
+
+Sets up callback function to be called when the library reports work progress or errors. The callback must not call any library functions.
+
+`user_info` value will be passed to the callback.
+
+`NULL` callback clears the current callback.
+
+In the log callback the `message` is a zero-terminated string containing informative message to output. It is valid only until the callback returns.
+
+`liq_set_log_flush_callback()` sets up callback function that will be called after the last log callback, which can be used to flush buffers and free resources used by the log callback.
+
+----
+
+    liq_attr* liq_attr_create_with_allocator(void* (*malloc)(size_t), void (*free)(void*));
+
+Same as `liq_attr_create`, but uses given `malloc` and `free` replacements to allocate all memory used by the library.
+
+The `malloc` function must return 16-byte aligned memory on x86 (and on other architectures memory aligned for `double` and pointers). Conversely, if your stdlib's `malloc` doesn't return appropriately aligned memory, you should use this function to provide aligned replacements.
+
+----
+
+    liq_attr* liq_attr_copy(liq_attr *orig);
+
+Creates an independent copy of `liq_attr`. The copy should also be freed using `liq_attr_destroy()`.
+
+---
+
+    liq_error liq_set_output_gamma(liq_result* res, double gamma);
+
+Sets gamma correction for generated palette and remapped image. Must be > 0 and < 1, e.g. `0.45455` for gamma 1/2.2 in PNG images. By default output gamma is same as gamma of the input image.
+
+----
+
+    int liq_image_get_width(const liq_image *img);
+    int liq_image_get_height(const liq_image *img);
+    double liq_get_output_gamma(const liq_result *result);
+
+Getters for `width`, `height` and `gamma` of the input image.
+
+If the input is invalid, these all return -1.
+
+---
+
+    liq_error liq_image_add_fixed_color(liq_image* img, liq_color color);
+
+Reserves a color in the output palette created from this image. It behaves as if the given color was used in the image and was very important.
+
+RGB values of `liq_color` are assumed to have the same gamma as the image.
+
+It must be called before the image is quantized.
+
+Returns error if more than 256 colors are added. If image is quantized to fewer colors than the number of fixed colors added, then excess fixed colors will be ignored.
+
+---
+
+    int liq_version();
+
+Returns version of the library as an integer. Same as `LIQ_VERSION`. Human-readable version is defined as `LIQ_VERSION_STRING`.
+
+## Multithreading
+
+The library is stateless and doesn't use any global or thread-local storage. It doesn't use any locks.
+
+* Different threads can perform unrelated quantizations/remappings at the same time (e.g. each thread working on a different image).
+* The same `liq_attr`, `liq_result`, etc. can be accessed from different threads, but not at the same time (e.g. you can create `liq_attr` in one thread and free it in another).
+
+The library needs to sort unique colors present in the image. Although the sorting algorithm does few things to make stack usage minimal in typical cases, there is no guarantee against extremely degenerate cases, so threads should have automatically growing stack.
+
+### OpenMP
+
+The library will parallelize some operations if compiled with OpenMP.
+
+You must not increase number of maximum threads after `liq_image` has been created, as it allocates some per-thread buffers.
+
+Callback of `liq_image_create_custom()` may be called from different threads at the same time.
+
+## Acknowledgements
+
+Thanks to Irfan Skiljan for helping test the first version of the library.
+
+The library is developed by [Kornel Lesiński](mailto:%20kornel@pngquant.org).
diff --git a/gst/dvbsubenc/libimagequant/blur.c b/gst/dvbsubenc/libimagequant/blur.c
new file mode 100644
index 000000000..1e711dd27
--- /dev/null
+++ b/gst/dvbsubenc/libimagequant/blur.c
@@ -0,0 +1,131 @@
+
+#include "libimagequant.h"
+#include "pam.h"
+#include "blur.h"
+
+/*
+ Blurs image horizontally (width 2*size+1) and writes it transposed to dst (called twice gives 2d blur)
+ */
+static void
+transposing_1d_blur (unsigned char *restrict src, unsigned char *restrict dst,
+    unsigned int width, unsigned int height, const unsigned int size)
+{
+  for (unsigned int j = 0; j < height; j++) {
+    unsigned char *restrict row = src + j * width;
+
+    // accumulate sum for pixels outside line
+    unsigned int sum;
+    sum = row[0] * size;
+    for (unsigned int i = 0; i < size; i++) {
+      sum += row[i];
+    }
+
+    // blur with left side outside line
+    for (unsigned int i = 0; i < size; i++) {
+      sum -= row[0];
+      sum += row[i + size];
+
+      dst[i * height + j] = sum / (size * 2);
+    }
+
+    for (unsigned int i = size; i < width - size; i++) {
+      sum -= row[i - size];
+      sum += row[i + size];
+
+      dst[i * height + j] = sum / (size * 2);
+    }
+
+    // blur with right side outside line
+    for (unsigned int i = width - size; i < width; i++) {
+      sum -= row[i - size];
+      sum += row[width - 1];
+
+      dst[i * height + j] = sum / (size * 2);
+    }
+  }
+}
+
+/**
+ * Picks maximum of neighboring pixels (blur + lighten)
+ */
+LIQ_PRIVATE void
+liq_max3 (unsigned char *src, unsigned char *dst, unsigned int width,
+    unsigned int height)
+{
+  unsigned int i, j;
+
+  for (j = 0; j < height; j++) {
+    const unsigned char *row = src + j * width;
+    unsigned char t1, t2;
+    unsigned char prev, curr, next;
+
+    const unsigned char *prevrow = src + (j > 1 ? j - 1 : 0) * width;
+    const unsigned char *nextrow = src + MIN (height - 1, j + 1) * width;
+
+    curr = row[0];
+    next = row[0];
+
+    for (i = 0; i < width - 1; i++) {
+      prev = curr;
+      curr = next;
+      next = row[i + 1];
+
+      t1 = MAX (prev, next);
+      t2 = MAX (nextrow[i], prevrow[i]);
+      *dst++ = MAX (curr, MAX (t1, t2));
+    }
+
+    t1 = MAX (curr, next);
+    t2 = MAX (nextrow[width - 1], prevrow[width - 1]);
+    *dst++ = MAX (t1, t2);
+  }
+}
+
+/**
+ * Picks minimum of neighboring pixels (blur + darken)
+ */
+LIQ_PRIVATE void
+liq_min3 (unsigned char *src, unsigned char *dst, unsigned int width,
+    unsigned int height)
+{
+  unsigned int j;
+
+  for (j = 0; j < height; j++) {
+    unsigned char t1, t2;
+    const unsigned char *row = src + j * width,
+        *prevrow = src + (j > 1 ? j - 1 : 0) * width,
+        *nextrow = src + MIN (height - 1, j + 1) * width;
+
+    unsigned char prev, curr = row[0], next = row[0];
+
+    for (unsigned int i = 0; i < width - 1; i++) {
+      prev = curr;
+      curr = next;
+      next = row[i + 1];
+
+      t1 = MIN (prev, next);
+      t2 = MIN (nextrow[i], prevrow[i]);
+      *dst++ = MIN (curr, MIN (t1, t2));
+    }
+
+    t1 = MIN (curr, next);
+    t2 = MIN (nextrow[width - 1], prevrow[width - 1]);
+    *dst++ = MIN (t1, t2);
+  }
+}
+
+/*
+ Filters src image and saves it to dst, overwriting tmp in the process.
+ Image must be width*height pixels high. Size controls radius of box blur.
+ */
+LIQ_PRIVATE void
+liq_blur (unsigned char *src, unsigned char *tmp, unsigned char *dst,
+    unsigned int width, unsigned int height, unsigned int size)
+{
+  assert (size > 0);
+  if (width < 2 * size + 1 || height < 2 * size + 1) {
+    return;
+  }
+  transposing_1d_blur (src, tmp, width, height, size);
+  transposing_1d_blur (tmp, dst, height, width, size);
+}
diff --git a/gst/dvbsubenc/libimagequant/blur.h b/gst/dvbsubenc/libimagequant/blur.h
new file mode 100644
index 000000000..06ae8cb49
--- /dev/null
+++ b/gst/dvbsubenc/libimagequant/blur.h
@@ -0,0 +1,4 @@
+
+LIQ_PRIVATE void liq_blur(unsigned char *src, unsigned char *tmp, unsigned char *dst, unsigned int width, unsigned int height, unsigned int size);
+LIQ_PRIVATE void liq_max3(unsigned char *src, unsigned char *dst, unsigned int width, unsigned int height);
+LIQ_PRIVATE void liq_min3(unsigned char *src, unsigned char *dst, unsigned int width, unsigned int height);
diff --git a/gst/dvbsubenc/libimagequant/libimagequant.c b/gst/dvbsubenc/libimagequant/libimagequant.c
new file mode 100644
index 000000000..1eaccbf9e
--- /dev/null
+++ b/gst/dvbsubenc/libimagequant/libimagequant.c
@@ -0,0 +1,2054 @@
+/* pngquant.c - quantize the colors in an alphamap down to a specified number
+**
+** Copyright (C) 1989, 1991 by Jef Poskanzer.
+** Copyright (C) 1997, 2000, 2002 by Greg Roelofs; based on an idea by
+**                                Stefan Schneider.
+** © 2009-2013 by Kornel Lesinski.
+**
+** Permission to use, copy, modify, and distribute this software and its
+** documentation for any purpose and without fee is hereby granted, provided
+** that the above copyright notice appear in all copies and that both that
+** copyright notice and this permission notice appear in supporting
+** documentation.  This software is provided "as is" without express or
+** implied warranty.
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <limits.h>
+
+#if !(defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199900L) && !(defined(_MSC_VER) && _MSC_VER >= 1800)
+#error "This program requires C99, e.g. -std=c99 switch in GCC or it requires MSVC 18.0 or higher."
+#error "Ignore torrent of syntax errors that may follow. It's only because compiler is set to use too old C version."
+#endif
+
+#ifdef _OPENMP
+#include <omp.h>
+#else
+#define omp_get_max_threads() 1
+#define omp_get_thread_num() 0
+#endif
+
+#include "libimagequant.h"
+
+#include "pam.h"
+#include "mediancut.h"
+#include "nearest.h"
+#include "blur.h"
+#include "viter.h"
+
+#define LIQ_HIGH_MEMORY_LIMIT (1<<26)   /* avoid allocating buffers larger than 64MB */
+
+// each structure has a pointer as a unique identifier that allows type checking at run time
+static const char *const liq_attr_magic = "liq_attr", *const liq_image_magic =
+    "liq_image", *const liq_result_magic =
+    "liq_result", *const liq_remapping_result_magic =
+    "liq_remapping_result", *const liq_freed_magic = "free";
+#define CHECK_STRUCT_TYPE(attr, kind) liq_crash_if_invalid_handle_pointer_given((const liq_attr*)attr, kind ## _magic)
+#define CHECK_USER_POINTER(ptr) liq_crash_if_invalid_pointer_given(ptr)
+
+struct liq_attr
+{
+  const char *magic_header;
+  void *(*malloc) (size_t);
+  void (*free) (void *);
+
+  double target_mse, max_mse, voronoi_iteration_limit;
+  float min_opaque_val;
+  unsigned int max_colors, max_histogram_entries;
+  unsigned int min_posterization_output /* user setting */ ,
+      min_posterization_input /* speed setting */ ;
+  unsigned int voronoi_iterations, feedback_loop_trials;
+  bool last_index_transparent, use_contrast_maps, use_dither_map, fast_palette;
+  unsigned int speed;
+  liq_log_callback_function *log_callback;
+  void *log_callback_user_info;
+  liq_log_flush_callback_function *log_flush_callback;
+  void *log_flush_callback_user_info;
+};
+
+struct liq_image
+{
+  const char *magic_header;
+  void *(*malloc) (size_t);
+  void (*free) (void *);
+
+  f_pixel *f_pixels;
+  rgba_pixel **rows;
+  double gamma;
+  unsigned int width, height;
+  unsigned char *noise, *edges, *dither_map;
+  rgba_pixel *pixels, *temp_row;
+  f_pixel *temp_f_row;
+  liq_image_get_rgba_row_callback *row_callback;
+  void *row_callback_user_info;
+  float min_opaque_val;
+  f_pixel fixed_colors[256];
+  unsigned short fixed_colors_count;
+  bool free_pixels, free_rows, free_rows_internal;
+};
+
+typedef struct liq_remapping_result
+{
+  const char *magic_header;
+  void *(*malloc) (size_t);
+  void (*free) (void *);
+
+  unsigned char *pixels;
+  colormap *palette;
+  liq_palette int_palette;
+  double gamma, palette_error;
+  float dither_level;
+  bool use_dither_map;
+} liq_remapping_result;
+
+struct liq_result
+{
+  const char *magic_header;
+  void *(*malloc) (size_t);
+  void (*free) (void *);
+
+  liq_remapping_result *remapping;
+  colormap *palette;
+  liq_palette int_palette;
+  float dither_level;
+  double gamma, palette_error;
+  int min_posterization_output;
+  bool use_dither_map, fast_palette;
+};
+
+static liq_result *pngquant_quantize (histogram * hist,
+    const liq_attr * options, const liq_image * img);
+static void modify_alpha (liq_image * input_image,
+    rgba_pixel * const row_pixels);
+static void contrast_maps (liq_image * image);
+static histogram *get_histogram (liq_image * input_image,
+    const liq_attr * options);
+static const rgba_pixel *liq_image_get_row_rgba (liq_image * input_image,
+    unsigned int row);
+static const f_pixel *liq_image_get_row_f (liq_image * input_image,
+    unsigned int row);
+static void liq_remapping_result_destroy (liq_remapping_result * result);
+
+static void
+liq_verbose_printf (const liq_attr * context, const char *fmt, ...)
+{
+  if (context->log_callback) {
+    va_list va;
+    int required_space;
+    char *buf;
+
+    va_start (va, fmt);
+    required_space = vsnprintf (NULL, 0, fmt, va) + 1;  // +\0
+    va_end (va);
+
+    buf = g_alloca (required_space);
+
+    va_start (va, fmt);
+    vsnprintf (buf, required_space, fmt, va);
+    va_end (va);
+
+    context->log_callback (context, buf, context->log_callback_user_info);
+  }
+}
+
+inline static void
+verbose_print (const liq_attr * attr, const char *msg)
+{
+  if (attr->log_callback) {
+    attr->log_callback (attr, msg, attr->log_callback_user_info);
+  }
+}
+
+static void
+liq_verbose_printf_flush (liq_attr * attr)
+{
+  if (attr->log_flush_callback) {
+    attr->log_flush_callback (attr, attr->log_flush_callback_user_info);
+  }
+}
+
+#if USE_SSE
+inline static bool
+is_sse_available (void)
+{
+#if (defined(__x86_64__) || defined(__amd64))
+  return true;
+#else
+  int a, b, c, d;
+  cpuid (1, a, b, c, d);
+  return d & (1 << 25);         // edx bit 25 is set when SSE is present
+#endif
+}
+#endif
+
+/* make it clear in backtrace when user-supplied handle points to invalid memory */
+NEVER_INLINE LIQ_EXPORT bool liq_crash_if_invalid_handle_pointer_given (const
+    liq_attr * user_supplied_pointer, const char *const expected_magic_header);
+LIQ_EXPORT bool
+liq_crash_if_invalid_handle_pointer_given (const liq_attr *
+    user_supplied_pointer, const char *const expected_magic_header)
+{
+  if (!user_supplied_pointer) {
+    return false;
+  }
+
+  if (user_supplied_pointer->magic_header == liq_freed_magic) {
+    fprintf (stderr, "%s used after being freed", expected_magic_header);
+    // this is not normal error handling, this is programmer error that should crash the program.
+    // program cannot safely continue if memory has been used after it's been freed.
+    // abort() is nasty, but security vulnerability may be worse.
+    abort ();
+  }
+
+  return user_supplied_pointer->magic_header == expected_magic_header;
+}
+
+NEVER_INLINE LIQ_EXPORT bool liq_crash_if_invalid_pointer_given (void *pointer);
+LIQ_EXPORT bool
+liq_crash_if_invalid_pointer_given (void *pointer)
+{
+  char test_access;
+
+  if (!pointer) {
+    return false;
+  }
+  // Force a read from the given (potentially invalid) memory location in order to check early whether this crashes the program or not.
+  // It doesn't matter what value is read, the code here is just to shut the compiler up about unused read.
+  test_access = *((volatile char *) pointer);
+  return test_access || true;
+}
+
+static void
+liq_log_error (const liq_attr * attr, const char *msg)
+{
+  if (!CHECK_STRUCT_TYPE (attr, liq_attr))
+    return;
+  liq_verbose_printf (attr, "  error: %s", msg);
+}
+
+static double
+quality_to_mse (long quality)
+{
+  const double extra_low_quality_fudge =
+      MAX (0, 0.016 / (0.001 + quality) - 0.001);
+  if (quality == 0) {
+    return MAX_DIFF;
+  }
+  if (quality == 100) {
+    return 0;
+  }
+  // curve fudged to be roughly similar to quality of libjpeg
+  // except lowest 10 for really low number of colors
+  return extra_low_quality_fudge + 2.5 / pow (210.0 + quality,
+      1.2) * (100.1 - quality) / 100.0;
+}
+
+static unsigned int
+mse_to_quality (double mse)
+{
+  int i;
+  for (i = 100; i > 0; i--) {
+    if (mse <= quality_to_mse (i) + 0.000001) { // + epsilon for floating point errors
+      return i;
+    }
+  }
+  return 0;
+}
+
+LIQ_EXPORT liq_error
+liq_set_quality (liq_attr * attr, int minimum, int target)
+{
+  if (!CHECK_STRUCT_TYPE (attr, liq_attr))
+    return LIQ_INVALID_POINTER;
+  if (target < 0 || target > 100 || target < minimum || minimum < 0)
+    return LIQ_VALUE_OUT_OF_RANGE;
+
+  attr->target_mse = quality_to_mse (target);
+  attr->max_mse = quality_to_mse (minimum);
+  return LIQ_OK;
+}
+
+LIQ_EXPORT int
+liq_get_min_quality (const liq_attr * attr)
+{
+  if (!CHECK_STRUCT_TYPE (attr, liq_attr))
+    return -1;
+  return mse_to_quality (attr->max_mse);
+}
+
+LIQ_EXPORT int
+liq_get_max_quality (const liq_attr * attr)
+{
+  if (!CHECK_STRUCT_TYPE (attr, liq_attr))
+    return -1;
+  return mse_to_quality (attr->target_mse);
+}
+
+
+LIQ_EXPORT liq_error
+liq_set_max_colors (liq_attr * attr, int colors)
+{
+  if (!CHECK_STRUCT_TYPE (attr, liq_attr))
+    return LIQ_INVALID_POINTER;
+  if (colors < 2 || colors > 256)
+    return LIQ_VALUE_OUT_OF_RANGE;
+
+  attr->max_colors = colors;
+  return LIQ_OK;
+}
+
+LIQ_EXPORT int
+liq_get_max_colors (const liq_attr * attr)
+{
+  if (!CHECK_STRUCT_TYPE (attr, liq_attr))
+    return -1;
+
+  return attr->max_colors;
+}
+
+LIQ_EXPORT liq_error
+liq_set_min_posterization (liq_attr * attr, int bits)
+{
+  if (!CHECK_STRUCT_TYPE (attr, liq_attr))
+    return LIQ_INVALID_POINTER;
+  if (bits < 0 || bits > 4)
+    return LIQ_VALUE_OUT_OF_RANGE;
+
+  attr->min_posterization_output = bits;
+  return LIQ_OK;
+}
+
+LIQ_EXPORT int
+liq_get_min_posterization (const liq_attr * attr)
+{
+  if (!CHECK_STRUCT_TYPE (attr, liq_attr))
+    return -1;
+
+  return attr->min_posterization_output;
+}
+
+LIQ_EXPORT liq_error
+liq_set_speed (liq_attr * attr, int speed)
+{
+  int iterations;
+
+  if (!CHECK_STRUCT_TYPE (attr, liq_attr))
+    return LIQ_INVALID_POINTER;
+  if (speed < 1 || speed > 10)
+    return LIQ_VALUE_OUT_OF_RANGE;
+
+  iterations = MAX (8 - speed, 0);
+  iterations += iterations * iterations / 2;
+  attr->voronoi_iterations = iterations;
+  attr->voronoi_iteration_limit = 1.0 / (double) (1 << (23 - speed));
+  attr->feedback_loop_trials = MAX (56 - 9 * speed, 0);
+
+  attr->max_histogram_entries = (1 << 17) + (1 << 18) * (10 - speed);
+  attr->min_posterization_input = (speed >= 8) ? 1 : 0;
+  attr->fast_palette = (speed >= 7);
+  attr->use_dither_map = (speed <= (omp_get_max_threads () > 1 ? 7 : 5));       // parallelized dither map might speed up floyd remapping
+  attr->use_contrast_maps = (speed <= 7) || attr->use_dither_map;
+  attr->speed = speed;
+  return LIQ_OK;
+}
+
+LIQ_EXPORT int
+liq_get_speed (const liq_attr * attr)
+{
+  if (!CHECK_STRUCT_TYPE (attr, liq_attr))
+    return -1;
+
+  return attr->speed;
+}
+
+LIQ_EXPORT liq_error
+liq_set_output_gamma (liq_result * res, double gamma)
+{
+  if (!CHECK_STRUCT_TYPE (res, liq_result))
+    return LIQ_INVALID_POINTER;
+  if (gamma <= 0 || gamma >= 1.0)
+    return LIQ_VALUE_OUT_OF_RANGE;
+
+  if (res->remapping) {
+    liq_remapping_result_destroy (res->remapping);
+    res->remapping = NULL;
+  }
+
+  res->gamma = gamma;
+  return LIQ_OK;
+}
+
+LIQ_EXPORT liq_error
+liq_set_min_opacity (liq_attr * attr, int min)
+{
+  if (!CHECK_STRUCT_TYPE (attr, liq_attr))
+    return LIQ_INVALID_POINTER;
+  if (min < 0 || min > 255)
+    return LIQ_VALUE_OUT_OF_RANGE;
+
+  attr->min_opaque_val = (double) min / 255.0;
+  return LIQ_OK;
+}
+
+LIQ_EXPORT int
+liq_get_min_opacity (const liq_attr * attr)
+{
+  if (!CHECK_STRUCT_TYPE (attr, liq_attr))
+    return -1;
+
+  return MIN (255, 256.0 * attr->min_opaque_val);
+}
+
+LIQ_EXPORT void
+liq_set_last_index_transparent (liq_attr * attr, int is_last)
+{
+  if (!CHECK_STRUCT_TYPE (attr, liq_attr))
+    return;
+
+  attr->last_index_transparent = ! !is_last;
+}
+
+LIQ_EXPORT void
+liq_set_log_callback (liq_attr * attr, liq_log_callback_function * callback,
+    void *user_info)
+{
+  if (!CHECK_STRUCT_TYPE (attr, liq_attr))
+    return;
+
+  liq_verbose_printf_flush (attr);
+  attr->log_callback = callback;
+  attr->log_callback_user_info = user_info;
+}
+
+LIQ_EXPORT void
+liq_set_log_flush_callback (liq_attr * attr,
+    liq_log_flush_callback_function * callback, void *user_info)
+{
+  if (!CHECK_STRUCT_TYPE (attr, liq_attr))
+    return;
+
+  attr->log_flush_callback = callback;
+  attr->log_flush_callback_user_info = user_info;
+}
+
+LIQ_EXPORT liq_attr *
+liq_attr_create (void)
+{
+  return liq_attr_create_with_allocator (NULL, NULL);
+}
+
+LIQ_EXPORT void
+liq_attr_destroy (liq_attr * attr)
+{
+  if (!CHECK_STRUCT_TYPE (attr, liq_attr)) {
+    return;
+  }
+
+  liq_verbose_printf_flush (attr);
+
+  attr->magic_header = liq_freed_magic;
+  attr->free (attr);
+}
+
+LIQ_EXPORT liq_attr *
+liq_attr_copy (liq_attr * orig)
+{
+  liq_attr *attr;
+  if (!CHECK_STRUCT_TYPE (orig, liq_attr)) {
+    return NULL;
+  }
+
+  attr = orig->malloc (sizeof (liq_attr));
+  if (!attr)
+    return NULL;
+  *attr = *orig;
+  return attr;
+}
+
+static void *
+liq_aligned_malloc (size_t size)
+{
+  unsigned char *ptr = malloc (size + 16);
+  uintptr_t offset;
+  if (!ptr) {
+    return NULL;
+  }
+
+  offset = 16 - ((uintptr_t) ptr & 15); // also reserves 1 byte for ptr[-1]
+  ptr += offset;
+  assert (0 == (((uintptr_t) ptr) & 15));
+  ptr[-1] = offset ^ 0x59;      // store how much pointer was shifted to get the original for free()
+  return ptr;
+}
+
+static void
+liq_aligned_free (void *inptr)
+{
+  unsigned char *ptr = inptr;
+  size_t offset = ptr[-1] ^ 0x59;
+  assert (offset > 0 && offset <= 16);
+  free (ptr - offset);
+}
+
+LIQ_EXPORT liq_attr *
+liq_attr_create_with_allocator (void *(*custom_malloc) (size_t),
+    void (*custom_free) (void *))
+{
+  liq_attr *attr;
+#if USE_SSE
+  if (!is_sse_available ()) {
+    return NULL;
+  }
+#endif
+  if (!custom_malloc && !custom_free) {
+    custom_malloc = liq_aligned_malloc;
+    custom_free = liq_aligned_free;
+  } else if (!custom_malloc != !custom_free) {
+    return NULL;                // either specify both or none
+  }
+
+  attr = custom_malloc (sizeof (liq_attr));
+  if (!attr)
+    return NULL;
+  *attr = (liq_attr) {
+    .magic_header = liq_attr_magic,.malloc = custom_malloc,.free = custom_free,.max_colors = 256,.min_opaque_val = 1,   // whether preserve opaque colors for IE (1.0=no, does not affect alpha)
+        .last_index_transparent = false,        // puts transparent color at last index. This is workaround for blu-ray subtitles.
+  .target_mse = 0,.max_mse = MAX_DIFF,};
+  liq_set_speed (attr, 3);
+  return attr;
+}
+
+LIQ_EXPORT liq_error
+liq_image_add_fixed_color (liq_image * img, liq_color color)
+{
+  float gamma_lut[256];
+  rgba_pixel pix = (rgba_pixel) {
+    .r = color.r,
+    .g = color.g,
+    .b = color.b,
+    .a = color.a
+  };
+
+  if (!CHECK_STRUCT_TYPE (img, liq_image))
+    return LIQ_INVALID_POINTER;
+  if (img->fixed_colors_count > 255)
+    return LIQ_BUFFER_TOO_SMALL;
+
+  to_f_set_gamma (gamma_lut, img->gamma);
+  img->fixed_colors[img->fixed_colors_count++] = to_f (gamma_lut, pix);
+  return LIQ_OK;
+}
+
+static bool
+liq_image_use_low_memory (liq_image * img)
+{
+  img->temp_f_row =
+      img->malloc (sizeof (img->f_pixels[0]) * img->width *
+      omp_get_max_threads ());
+  return img->temp_f_row != NULL;
+}
+
+static bool
+liq_image_should_use_low_memory (liq_image * img, const bool low_memory_hint)
+{
+  return img->width * img->height > (low_memory_hint ? LIQ_HIGH_MEMORY_LIMIT / 8 : LIQ_HIGH_MEMORY_LIMIT) / sizeof (f_pixel);   // Watch out for integer overflow
+}
+
+static liq_image *
+liq_image_create_internal (liq_attr * attr, rgba_pixel * rows[],
+    liq_image_get_rgba_row_callback * row_callback,
+    void *row_callback_user_info, int width, int height, double gamma)
+{
+  liq_image *img;
+  if (gamma < 0 || gamma > 1.0) {
+    liq_log_error (attr, "gamma must be >= 0 and <= 1 (try 1/gamma instead)");
+    return NULL;
+  }
+
+  if (!rows && !row_callback) {
+    liq_log_error (attr, "missing row data");
+    return NULL;
+  }
+
+  img = attr->malloc (sizeof (liq_image));
+  if (!img)
+    return NULL;
+  *img = (liq_image) {
+  .magic_header = liq_image_magic,.malloc = attr->malloc,.free =
+        attr->free,.width = width,.height = height,.gamma =
+        gamma ? gamma : 0.45455,.rows = rows,.row_callback =
+        row_callback,.row_callback_user_info =
+        row_callback_user_info,.min_opaque_val = attr->min_opaque_val,};
+
+  if (!rows || attr->min_opaque_val < 1.f) {
+    img->temp_row =
+        attr->malloc (sizeof (img->temp_row[0]) * width *
+        omp_get_max_threads ());
+    if (!img->temp_row)
+      return NULL;
+  }
+  // if image is huge or converted pixels are not likely to be reused then don't cache converted pixels
+  if (liq_image_should_use_low_memory (img, !img->temp_row
+          && !attr->use_contrast_maps && !attr->use_dither_map)) {
+    verbose_print (attr, "  conserving memory");
+    if (!liq_image_use_low_memory (img))
+      return NULL;
+  }
+
+  if (img->min_opaque_val < 1.f) {
+    verbose_print (attr,
+        "  Working around IE6 bug by making image less transparent...");
+  }
+
+  return img;
+}
+
+LIQ_EXPORT liq_error
+liq_image_set_memory_ownership (liq_image * img, int ownership_flags)
+{
+  if (!CHECK_STRUCT_TYPE (img, liq_image))
+    return LIQ_INVALID_POINTER;
+  if (!img->rows || !ownership_flags
+      || (ownership_flags & ~(LIQ_OWN_ROWS | LIQ_OWN_PIXELS))) {
+    return LIQ_VALUE_OUT_OF_RANGE;
+  }
+
+  if (ownership_flags & LIQ_OWN_ROWS) {
+    if (img->free_rows_internal)
+      return LIQ_VALUE_OUT_OF_RANGE;
+    img->free_rows = true;
+  }
+
+  if (ownership_flags & LIQ_OWN_PIXELS) {
+    img->free_pixels = true;
+    if (!img->pixels) {
+      // for simplicity of this API there's no explicit bitmap argument,
+      // so the row with the lowest address is assumed to be at the start of the bitmap
+      img->pixels = img->rows[0];
+      for (unsigned int i = 1; i < img->height; i++) {
+        img->pixels = MIN (img->pixels, img->rows[i]);
+      }
+    }
+  }
+
+  return LIQ_OK;
+}
+
+static bool
+check_image_size (const liq_attr * attr, const int width, const int height)
+{
+  if (!CHECK_STRUCT_TYPE (attr, liq_attr)) {
+    return false;
+  }
+
+  if (width <= 0 || height <= 0) {
+    liq_log_error (attr, "width and height must be > 0");
+    return false;
+  }
+  if (width > INT_MAX / height) {
+    liq_log_error (attr, "image too large");
+    return false;
+  }
+  return true;
+}
+
+LIQ_EXPORT liq_image *
+liq_image_create_custom (liq_attr * attr,
+    liq_image_get_rgba_row_callback * row_callback, void *user_info, int width,
+    int height, double gamma)
+{
+  if (!check_image_size (attr, width, height)) {
+    return NULL;
+  }
+  return liq_image_create_internal (attr, NULL, row_callback, user_info, width,
+      height, gamma);
+}
+
+LIQ_EXPORT liq_image *
+liq_image_create_rgba_rows (liq_attr * attr, void *rows[], int width,
+    int height, double gamma)
+{
+  if (!check_image_size (attr, width, height)) {
+    return NULL;
+  }
+
+  for (int i = 0; i < height; i++) {
+    if (!CHECK_USER_POINTER (rows + i) || !CHECK_USER_POINTER (rows[i])) {
+      liq_log_error (attr, "invalid row pointers");
+      return NULL;
+    }
+  }
+  return liq_image_create_internal (attr, (rgba_pixel **) rows, NULL, NULL,
+      width, height, gamma);
+}
+
+LIQ_EXPORT liq_image *
+liq_image_create_rgba (liq_attr * attr, void *bitmap, int width, int height,
+    double gamma)
+{
+  rgba_pixel *pixels;
+  rgba_pixel **rows;
+  liq_image *image;
+
+  if (!check_image_size (attr, width, height)) {
+    return NULL;
+  }
+  if (!CHECK_USER_POINTER (bitmap)) {
+    liq_log_error (attr, "invalid bitmap pointer");
+    return NULL;
+  }
+
+  pixels = bitmap;
+  rows = attr->malloc (sizeof (rows[0]) * height);
+  if (!rows)
+    return NULL;
+
+  for (int i = 0; i < height; i++) {
+    rows[i] = pixels + width * i;
+  }
+
+  image =
+      liq_image_create_internal (attr, rows, NULL, NULL, width, height, gamma);
+  image->free_rows = true;
+  image->free_rows_internal = true;
+  return image;
+}
+
+NEVER_INLINE LIQ_EXPORT void
+liq_executing_user_callback (liq_image_get_rgba_row_callback * callback,
+    liq_color * temp_row, int row, int width, void *user_info);
+LIQ_EXPORT void
+liq_executing_user_callback (liq_image_get_rgba_row_callback * callback,
+    liq_color * temp_row, int row, int width, void *user_info)
+{
+  assert (callback);
+  assert (temp_row);
+  callback (temp_row, row, width, user_info);
+}
+
+inline static bool
+liq_image_can_use_rows (liq_image * img)
+{
+  const bool iebug = img->min_opaque_val < 1.f;
+  return (img->rows && !iebug);
+}
+
+static const rgba_pixel *
+liq_image_get_row_rgba (liq_image * img, unsigned int row)
+{
+  rgba_pixel *temp_row;
+  if (liq_image_can_use_rows (img)) {
+    return img->rows[row];
+  }
+
+  assert (img->temp_row);
+  temp_row = img->temp_row + img->width * omp_get_thread_num ();
+  if (img->rows) {
+    memcpy (temp_row, img->rows[row], img->width * sizeof (temp_row[0]));
+  } else {
+    liq_executing_user_callback (img->row_callback, (liq_color *) temp_row, row,
+        img->width, img->row_callback_user_info);
+  }
+
+  if (img->min_opaque_val < 1.f)
+    modify_alpha (img, temp_row);
+  return temp_row;
+}
+
+static void
+convert_row_to_f (liq_image * img, f_pixel * row_f_pixels,
+    const unsigned int row, const float gamma_lut[])
+{
+  assert (row_f_pixels);
+  assert (!USE_SSE || 0 == ((uintptr_t) row_f_pixels & 15));
+
+  {
+    const rgba_pixel *const row_pixels = liq_image_get_row_rgba (img, row);
+    unsigned int col;
+
+    for (col = 0; col < img->width; col++) {
+      row_f_pixels[col] = to_f (gamma_lut, row_pixels[col]);
+    }
+  }
+}
+
+static const f_pixel *
+liq_image_get_row_f (liq_image * img, unsigned int row)
+{
+  if (!img->f_pixels) {
+    if (img->temp_f_row) {
+      float gamma_lut[256];
+      f_pixel *row_for_thread;
+
+      to_f_set_gamma (gamma_lut, img->gamma);
+      row_for_thread = img->temp_f_row + img->width * omp_get_thread_num ();
+      convert_row_to_f (img, row_for_thread, row, gamma_lut);
+
+      return row_for_thread;
+    }
+
+    assert (omp_get_thread_num () == 0);
+    if (!liq_image_should_use_low_memory (img, false)) {
+      img->f_pixels =
+          img->malloc (sizeof (img->f_pixels[0]) * img->width * img->height);
+    }
+    if (!img->f_pixels) {
+      if (!liq_image_use_low_memory (img))
+        return NULL;
+      return liq_image_get_row_f (img, row);
+    }
+
+    {
+      float gamma_lut[256];
+      to_f_set_gamma (gamma_lut, img->gamma);
+      for (unsigned int i = 0; i < img->height; i++) {
+        convert_row_to_f (img, &img->f_pixels[i * img->width], i, gamma_lut);
+      }
+    }
+  }
+  return img->f_pixels + img->width * row;
+}
+
+LIQ_EXPORT int
+liq_image_get_width (const liq_image * input_image)
+{
+  if (!CHECK_STRUCT_TYPE (input_image, liq_image))
+    return -1;
+  return input_image->width;
+}
+
+LIQ_EXPORT int
+liq_image_get_height (const liq_image * input_image)
+{
+  if (!CHECK_STRUCT_TYPE (input_image, liq_image))
+    return -1;
+  return input_image->height;
+}
+
+typedef void free_func (void *);
+
+static free_func *
+get_default_free_func (liq_image * img)
+{
+  // When default allocator is used then user-supplied pointers must be freed with free()
+  if (img->free_rows_internal || img->free != liq_aligned_free) {
+    return img->free;
+  }
+  return free;
+}
+
+static void
+liq_image_free_rgba_source (liq_image * input_image)
+{
+  if (input_image->free_pixels && input_image->pixels) {
+    get_default_free_func (input_image) (input_image->pixels);
+    input_image->pixels = NULL;
+  }
+
+  if (input_image->free_rows && input_image->rows) {
+    get_default_free_func (input_image) (input_image->rows);
+    input_image->rows = NULL;
+  }
+}
+
+LIQ_EXPORT void
+liq_image_destroy (liq_image * input_image)
+{
+  if (!CHECK_STRUCT_TYPE (input_image, liq_image))
+    return;
+
+  liq_image_free_rgba_source (input_image);
+
+  if (input_image->noise) {
+    input_image->free (input_image->noise);
+  }
+
+  if (input_image->edges) {
+    input_image->free (input_image->edges);
+  }
+
+  if (input_image->dither_map) {
+    input_image->free (input_image->dither_map);
+  }
+
+  if (input_image->f_pixels) {
+    input_image->free (input_image->f_pixels);
+  }
+
+  if (input_image->temp_row) {
+    input_image->free (input_image->temp_row);
+  }
+
+  if (input_image->temp_f_row) {
+    input_image->free (input_image->temp_f_row);
+  }
+
+  input_image->magic_header = liq_freed_magic;
+  input_image->free (input_image);
+}
+
+LIQ_EXPORT liq_result *
+liq_quantize_image (liq_attr * attr, liq_image * img)
+{
+  histogram *hist;
+  liq_result *result;
+
+  if (!CHECK_STRUCT_TYPE (attr, liq_attr))
+    return NULL;
+  if (!CHECK_STRUCT_TYPE (img, liq_image)) {
+    liq_log_error (attr, "invalid image pointer");
+    return NULL;
+  }
+
+  hist = get_histogram (img, attr);
+  if (!hist) {
+    return NULL;
+  }
+
+  result = pngquant_quantize (hist, attr, img);
+
+  pam_freeacolorhist (hist);
+  return result;
+}
+
+LIQ_EXPORT liq_error
+liq_set_dithering_level (liq_result * res, float dither_level)
+{
+  if (!CHECK_STRUCT_TYPE (res, liq_result))
+    return LIQ_INVALID_POINTER;
+
+  if (res->remapping) {
+    liq_remapping_result_destroy (res->remapping);
+    res->remapping = NULL;
+  }
+
+  if (res->dither_level < 0 || res->dither_level > 1.0f)
+    return LIQ_VALUE_OUT_OF_RANGE;
+  res->dither_level = dither_level;
+  return LIQ_OK;
+}
+
+static liq_remapping_result *
+liq_remapping_result_create (liq_result * result)
+{
+  liq_remapping_result *res;
+
+  if (!CHECK_STRUCT_TYPE (result, liq_result)) {
+    return NULL;
+  }
+
+  res = result->malloc (sizeof (liq_remapping_result));
+  if (!res)
+    return NULL;
+  *res = (liq_remapping_result) {
+  .magic_header = liq_remapping_result_magic,.malloc = result->malloc,.free =
+        result->free,.dither_level = result->dither_level,.use_dither_map =
+        result->use_dither_map,.palette_error = result->palette_error,.gamma =
+        result->gamma,.palette = pam_duplicate_colormap (result->palette),};
+  return res;
+}
+
+LIQ_EXPORT double
+liq_get_output_gamma (const liq_result * result)
+{
+  if (!CHECK_STRUCT_TYPE (result, liq_result))
+    return -1;
+
+  return result->gamma;
+}
+
+static void
+liq_remapping_result_destroy (liq_remapping_result * result)
+{
+  if (!CHECK_STRUCT_TYPE (result, liq_remapping_result))
+    return;
+
+  if (result->palette)
+    pam_freecolormap (result->palette);
+  if (result->pixels)
+    result->free (result->pixels);
+
+  result->magic_header = liq_freed_magic;
+  result->free (result);
+}
+
+LIQ_EXPORT void
+liq_result_destroy (liq_result * res)
+{
+  if (!CHECK_STRUCT_TYPE (res, liq_result))
+    return;
+
+  memset (&res->int_palette, 0, sizeof (liq_palette));
+
+  if (res->remapping) {
+    memset (&res->remapping->int_palette, 0, sizeof (liq_palette));
+    liq_remapping_result_destroy (res->remapping);
+  }
+
+  pam_freecolormap (res->palette);
+
+  res->magic_header = liq_freed_magic;
+  res->free (res);
+}
+
+LIQ_EXPORT double
+liq_get_quantization_error (liq_result * result)
+{
+  if (!CHECK_STRUCT_TYPE (result, liq_result))
+    return -1;
+
+  if (result->palette_error >= 0) {
+    return result->palette_error * 65536.0 / 6.0;
+  }
+
+  if (result->remapping && result->remapping->palette_error >= 0) {
+    return result->remapping->palette_error * 65536.0 / 6.0;
+  }
+
+  return result->palette_error;
+}
+
+LIQ_EXPORT int
+liq_get_quantization_quality (liq_result * result)
+{
+  if (!CHECK_STRUCT_TYPE (result, liq_result))
+    return -1;
+
+  if (result->palette_error >= 0) {
+    return mse_to_quality (result->palette_error);
+  }
+
+  if (result->remapping && result->remapping->palette_error >= 0) {
+    return mse_to_quality (result->remapping->palette_error);
+  }
+
+  return result->palette_error;
+}
+
+static int
+compare_popularity (const void *ch1, const void *ch2)
+{
+  const float v1 = ((const colormap_item *) ch1)->popularity;
+  const float v2 = ((const colormap_item *) ch2)->popularity;
+  return v1 > v2 ? -1 : 1;
+}
+
+static void
+sort_palette_qsort (colormap * map, int start, int nelem)
+{
+  qsort (map->palette + start, nelem, sizeof (map->palette[0]),
+      compare_popularity);
+}
+
+#define SWAP_PALETTE(map, a,b) { \
+    const colormap_item tmp = (map)->palette[(a)]; \
+    (map)->palette[(a)] = (map)->palette[(b)]; \
+    (map)->palette[(b)] = tmp; }
+
+static void
+sort_palette (colormap * map, const liq_attr * options)
+{
+  unsigned int i;
+  unsigned int num_transparent;
+
+  /*
+   ** Step 3.5 [GRR]: remap the palette colors so that all entries with
+   ** the maximal alpha value (i.e., fully opaque) are at the end and can
+   ** therefore be omitted from the tRNS chunk.
+   */
+  if (options->last_index_transparent) {
+    for (i = 0; i < map->colors; i++) {
+      if (map->palette[i].acolor.a < 1.0 / 256.0) {
+        const unsigned int old = i, transparent_dest = map->colors - 1;
+
+        SWAP_PALETTE (map, transparent_dest, old);
+
+        /* colors sorted by popularity make pngs slightly more compressible */
+        sort_palette_qsort (map, 0, map->colors - 1);
+        return;
+      }
+    }
+  }
+  /* move transparent colors to the beginning to shrink trns chunk */
+  num_transparent = 0;
+  for (i = 0; i < map->colors; i++) {
+    if (map->palette[i].acolor.a < 255.0 / 256.0) {
+      // current transparent color is swapped with earlier opaque one
+      if (i != num_transparent) {
+        SWAP_PALETTE (map, num_transparent, i);
+        i--;
+      }
+      num_transparent++;
+    }
+  }
+
+  liq_verbose_printf (options,
+      "  eliminated opaque tRNS-chunk entries...%d entr%s transparent",
+      num_transparent, (num_transparent == 1) ? "y" : "ies");
+
+  /* colors sorted by popularity make pngs slightly more compressible
+   * opaque and transparent are sorted separately
+   */
+  sort_palette_qsort (map, 0, num_transparent);
+  sort_palette_qsort (map, num_transparent, map->colors - num_transparent);
+
+  if (map->colors > 16) {
+    SWAP_PALETTE (map, 7, 1);   // slightly improves compression
+    SWAP_PALETTE (map, 8, 2);
+    SWAP_PALETTE (map, 9, 3);
+  }
+}
+
+inline static unsigned int
+posterize_channel (unsigned int color, unsigned int bits)
+{
+  return (color & ~((1 << bits) - 1)) | (color >> (8 - bits));
+}
+
+static void
+set_rounded_palette (liq_palette * const dest, colormap * const map,
+    const double gamma, unsigned int posterize)
+{
+  float gamma_lut[256];
+  to_f_set_gamma (gamma_lut, gamma);
+
+  dest->count = map->colors;
+  for (unsigned int x = 0; x < map->colors; ++x) {
+    rgba_pixel px = to_rgb (gamma, map->palette[x].acolor);
+
+    px.r = posterize_channel (px.r, posterize);
+    px.g = posterize_channel (px.g, posterize);
+    px.b = posterize_channel (px.b, posterize);
+    px.a = posterize_channel (px.a, posterize);
+
+    map->palette[x].acolor = to_f (gamma_lut, px);      /* saves rounding error introduced by to_rgb, which makes remapping & dithering more accurate */
+
+    if (!px.a) {
+      px.r = 'L';
+      px.g = 'i';
+      px.b = 'q';
+    }
+
+    dest->entries[x] = (liq_color) {
+    .r = px.r,.g = px.g,.b = px.b,.a = px.a};
+  }
+}
+
+LIQ_EXPORT const liq_palette *
+liq_get_palette (liq_result * result)
+{
+  if (!CHECK_STRUCT_TYPE (result, liq_result))
+    return NULL;
+
+  if (result->remapping && result->remapping->int_palette.count) {
+    return &result->remapping->int_palette;
+  }
+
+  if (!result->int_palette.count) {
+    set_rounded_palette (&result->int_palette, result->palette, result->gamma,
+        result->min_posterization_output);
+  }
+  return &result->int_palette;
+}
+
+#define MAX_THREADS 8
+
+static float
+remap_to_palette (liq_image * const input_image,
+    unsigned char *const *const output_pixels, colormap * const map,
+    const bool fast)
+{
+  const int rows = input_image->height;
+  const unsigned int cols = input_image->width;
+  const float min_opaque_val = input_image->min_opaque_val;
+  double remapping_error = 0;
+
+  if (!liq_image_get_row_f (input_image, 0)) {  // trigger lazy conversion
+    return -1;
+  }
+
+  {
+    struct nearest_map *const n = nearest_init (map, fast);
+
+    const unsigned int max_threads = MIN (MAX_THREADS, omp_get_max_threads ());
+    viter_state *average_color =
+        g_alloca (sizeof (viter_state) * (VITER_CACHE_LINE_GAP +
+            map->colors) * MAX_THREADS);
+    unsigned int row, col;
+
+    viter_init (map, max_threads, average_color);
+
+#pragma omp parallel for if (rows*cols > 3000) \
+            schedule(static) default(none) shared(average_color) reduction(+:remapping_error)
+    for (row = 0; row < rows; ++row) {
+      const f_pixel *const row_pixels = liq_image_get_row_f (input_image, row);
+      unsigned int last_match = 0;
+      for (col = 0; col < cols; ++col) {
+        f_pixel px = row_pixels[col];
+        float diff;
+
+        output_pixels[row][col] = last_match =
+            nearest_search (n, px, last_match, min_opaque_val, &diff);
+
+        remapping_error += diff;
+        viter_update_color (px, 1.0, map, last_match, omp_get_thread_num (),
+            average_color);
+      }
+    }
+
+    viter_finalize (map, max_threads, average_color);
+
+    nearest_free (n);
+  }
+
+  return remapping_error / (input_image->width * input_image->height);
+}
+
+inline static f_pixel
+get_dithered_pixel (const float dither_level, const float max_dither_error,
+    const f_pixel thiserr, const f_pixel px)
+{
+  /* Use Floyd-Steinberg errors to adjust actual color. */
+  const float sr = thiserr.r * dither_level,
+      sg = thiserr.g * dither_level,
+      sb = thiserr.b * dither_level, sa = thiserr.a * dither_level;
+  float a;
+  float ratio = 1.0;
+  float dither_error;
+
+  // allowing some overflow prevents undithered bands caused by clamping of all channels
+  if (px.r + sr > 1.03)
+    ratio = MIN (ratio, (1.03 - px.r) / sr);
+  else if (px.r + sr < 0)
+    ratio = MIN (ratio, px.r / -sr);
+  if (px.g + sg > 1.03)
+    ratio = MIN (ratio, (1.03 - px.g) / sg);
+  else if (px.g + sg < 0)
+    ratio = MIN (ratio, px.g / -sg);
+  if (px.b + sb > 1.03)
+    ratio = MIN (ratio, (1.03 - px.b) / sb);
+  else if (px.b + sb < 0)
+    ratio = MIN (ratio, px.b / -sb);
+
+  a = px.a + sa;
+  if (a > 1.0) {
+    a = 1.0;
+  } else if (a < 0) {
+    a = 0;
+  }
+  // If dithering error is crazy high, don't propagate it that much
+  // This prevents crazy geen pixels popping out of the blue (or red or black! ;)
+  dither_error = sr * sr + sg * sg + sb * sb + sa * sa;
+  if (dither_error > max_dither_error) {
+    ratio *= 0.8;
+  } else if (dither_error < 2.f / 256.f / 256.f) {
+    // don't dither areas that don't have noticeable error — makes file smaller
+    return px;
+  }
+
+  return (f_pixel) {
+  .r = px.r + sr * ratio,.g = px.g + sg * ratio,.b = px.b + sb * ratio,.a = a,};
+}
+
+/**
+  Uses edge/noise map to apply dithering only to flat areas. Dithering on edges creates jagged lines, and noisy areas are "naturally" dithered.
+
+  If output_image_is_remapped is true, only pixels noticeably changed by error diffusion will be written to output image.
+ */
+static void
+remap_to_palette_floyd (liq_image * input_image,
+    unsigned char *const output_pixels[], const colormap * map,
+    const float max_dither_error, const bool use_dither_map,
+    const bool output_image_is_remapped, float base_dithering_level)
+{
+  const unsigned int rows = input_image->height, cols = input_image->width;
+  const unsigned char *dither_map =
+      use_dither_map ? (input_image->
+      dither_map ? input_image->dither_map : input_image->edges) : NULL;
+  const float min_opaque_val = input_image->min_opaque_val;
+
+  const colormap_item *acolormap = map->palette;
+
+  struct nearest_map *const n = nearest_init (map, false);
+  unsigned int col;
+
+  /* Initialize Floyd-Steinberg error vectors. */
+  f_pixel *restrict thiserr, *restrict nexterr;
+  thiserr = input_image->malloc ((cols + 2) * sizeof (*thiserr) * 2);   // +2 saves from checking out of bounds access
+  nexterr = thiserr + (cols + 2);
+  srand (12345);                /* deterministic dithering is better for comparing results */
+  if (!thiserr)
+    return;
+
+  for (col = 0; col < cols + 2; ++col) {
+    const double rand_max = RAND_MAX;
+    thiserr[col].r = ((double) rand () - rand_max / 2.0) / rand_max / 255.0;
+    thiserr[col].g = ((double) rand () - rand_max / 2.0) / rand_max / 255.0;
+    thiserr[col].b = ((double) rand () - rand_max / 2.0) / rand_max / 255.0;
+    thiserr[col].a = ((double) rand () - rand_max / 2.0) / rand_max / 255.0;
+  }
+
+  // response to this value is non-linear and without it any value < 0.8 would give almost no dithering
+  base_dithering_level =
+      1.0 - (1.0 - base_dithering_level) * (1.0 - base_dithering_level) * (1.0 -
+      base_dithering_level);
+
+  if (dither_map) {
+    base_dithering_level *= 1.0 / 255.0;        // convert byte to float
+  }
+  base_dithering_level *= 15.0 / 16.0;  // prevent small errors from accumulating
+
+  {
+    bool fs_direction = true;
+    unsigned int last_match = 0;
+    for (unsigned int row = 0; row < rows; ++row) {
+      unsigned int col = (fs_direction) ? 0 : (cols - 1);
+      const f_pixel *const row_pixels = liq_image_get_row_f (input_image, row);
+
+      memset (nexterr, 0, (cols + 2) * sizeof (*nexterr));
+
+      do {
+        float dither_level = base_dithering_level;
+        f_pixel spx, xp, err;
+        unsigned int guessed_match;
+
+        if (dither_map) {
+          dither_level *= dither_map[row * cols + col];
+        }
+
+        spx =
+            get_dithered_pixel (dither_level, max_dither_error,
+            thiserr[col + 1], row_pixels[col]);
+
+        guessed_match =
+            output_image_is_remapped ? output_pixels[row][col] : last_match;
+        output_pixels[row][col] = last_match =
+            nearest_search (n, spx, guessed_match, min_opaque_val, NULL);
+
+        xp = acolormap[last_match].acolor;
+        err.r = spx.r - xp.r;
+        err.g = spx.r - xp.g;
+        err.b = spx.r - xp.b;
+        err.a = spx.r - xp.a;
+
+        // If dithering error is crazy high, don't propagate it that much
+        // This prevents crazy geen pixels popping out of the blue (or red or black! ;)
+        if (err.r * err.r + err.g * err.g + err.b * err.b + err.a * err.a >
+            max_dither_error) {
+          dither_level *= 0.75;
+        }
+
+        {
+          const float colorimp =
+              (3.0f + acolormap[last_match].acolor.a) / 4.0f * dither_level;
+          err.r *= colorimp;
+          err.g *= colorimp;
+          err.b *= colorimp;
+          err.a *= dither_level;
+        }
+
+        /* Propagate Floyd-Steinberg error terms. */
+        if (fs_direction) {
+          thiserr[col + 2].a += err.a * (7.f / 16.f);
+          thiserr[col + 2].r += err.r * (7.f / 16.f);
+          thiserr[col + 2].g += err.g * (7.f / 16.f);
+          thiserr[col + 2].b += err.b * (7.f / 16.f);
+
+          nexterr[col + 2].a = err.a * (1.f / 16.f);
+          nexterr[col + 2].r = err.r * (1.f / 16.f);
+          nexterr[col + 2].g = err.g * (1.f / 16.f);
+          nexterr[col + 2].b = err.b * (1.f / 16.f);
+
+          nexterr[col + 1].a += err.a * (5.f / 16.f);
+          nexterr[col + 1].r += err.r * (5.f / 16.f);
+          nexterr[col + 1].g += err.g * (5.f / 16.f);
+          nexterr[col + 1].b += err.b * (5.f / 16.f);
+
+          nexterr[col].a += err.a * (3.f / 16.f);
+          nexterr[col].r += err.r * (3.f / 16.f);
+          nexterr[col].g += err.g * (3.f / 16.f);
+          nexterr[col].b += err.b * (3.f / 16.f);
+
+        } else {
+          thiserr[col].a += err.a * (7.f / 16.f);
+          thiserr[col].r += err.r * (7.f / 16.f);
+          thiserr[col].g += err.g * (7.f / 16.f);
+          thiserr[col].b += err.b * (7.f / 16.f);
+
+          nexterr[col].a = err.a * (1.f / 16.f);
+          nexterr[col].r = err.r * (1.f / 16.f);
+          nexterr[col].g = err.g * (1.f / 16.f);
+          nexterr[col].b = err.b * (1.f / 16.f);
+
+          nexterr[col + 1].a += err.a * (5.f / 16.f);
+          nexterr[col + 1].r += err.r * (5.f / 16.f);
+          nexterr[col + 1].g += err.g * (5.f / 16.f);
+          nexterr[col + 1].b += err.b * (5.f / 16.f);
+
+          nexterr[col + 2].a += err.a * (3.f / 16.f);
+          nexterr[col + 2].r += err.r * (3.f / 16.f);
+          nexterr[col + 2].g += err.g * (3.f / 16.f);
+          nexterr[col + 2].b += err.b * (3.f / 16.f);
+        }
+
+        // remapping is done in zig-zag
+        if (fs_direction) {
+          ++col;
+          if (col >= cols)
+            break;
+        } else {
+          if (col <= 0)
+            break;
+          --col;
+        }
+      } while (1);
+
+      {
+        f_pixel *const temperr = thiserr;
+        thiserr = nexterr;
+        nexterr = temperr;
+      }
+
+      fs_direction = !fs_direction;
+    }
+  }
+
+  input_image->free (MIN (thiserr, nexterr));   // MIN because pointers were swapped
+  nearest_free (n);
+}
+
+/* fixed colors are always included in the palette, so it would be wasteful to duplicate them in palette from histogram */
+static void
+remove_fixed_colors_from_histogram (histogram * hist,
+    const liq_image * input_image, const float target_mse)
+{
+  const float max_difference = MAX (target_mse / 2.0, 2.0 / 256.0 / 256.0);
+  if (input_image->fixed_colors_count) {
+    for (int j = 0; j < hist->size; j++) {
+      for (unsigned int i = 0; i < input_image->fixed_colors_count; i++) {
+        if (colordifference (hist->achv[j].acolor,
+                input_image->fixed_colors[i]) < max_difference) {
+          hist->achv[j] = hist->achv[--hist->size];     // remove color from histogram by overwriting with the last entry
+          j--;
+          break;                // continue searching histogram
+        }
+      }
+    }
+  }
+}
+
+/* histogram contains information how many times each color is present in the image, weighted by importance_map */
+static histogram *
+get_histogram (liq_image * input_image, const liq_attr * options)
+{
+  unsigned int ignorebits =
+      MAX (options->min_posterization_output, options->min_posterization_input);
+  const unsigned int cols = input_image->width, rows = input_image->height;
+
+  if (!input_image->noise && options->use_contrast_maps) {
+    contrast_maps (input_image);
+  }
+
+  /*
+   ** Step 2: attempt to make a histogram of the colors, unclustered.
+   ** If at first we don't succeed, increase ignorebits to increase color
+   ** coherence and try again.
+   */
+
+  {
+    unsigned int maxcolors = options->max_histogram_entries;
+
+    struct acolorhash_table *acht;
+    const bool all_rows_at_once = liq_image_can_use_rows (input_image);
+    histogram *hist;
+
+    do {
+      acht =
+          pam_allocacolorhash (maxcolors, rows * cols, ignorebits,
+          options->malloc, options->free);
+      if (!acht)
+        return NULL;
+
+      // histogram uses noise contrast map for importance. Color accuracy in noisy areas is not very important.
+      // noise map does not include edges to avoid ruining anti-aliasing
+      for (unsigned int row = 0; row < rows; row++) {
+        bool added_ok;
+        if (all_rows_at_once) {
+          added_ok =
+              pam_computeacolorhash (acht,
+              (const rgba_pixel * const *) input_image->rows, cols, rows,
+              input_image->noise);
+          if (added_ok)
+            break;
+        } else {
+          const rgba_pixel *rows_p[1] =
+              { liq_image_get_row_rgba (input_image, row) };
+          added_ok =
+              pam_computeacolorhash (acht, rows_p, cols, 1,
+              input_image->noise ? &input_image->noise[row * cols] : NULL);
+        }
+        if (!added_ok) {
+          ignorebits++;
+          liq_verbose_printf (options,
+              "  too many colors! Scaling colors to improve clustering... %d",
+              ignorebits);
+          pam_freeacolorhash (acht);
+          acht = NULL;
+          break;
+        }
+      }
+    } while (!acht);
+
+    if (input_image->noise) {
+      input_image->free (input_image->noise);
+      input_image->noise = NULL;
+    }
+
+    if (input_image->free_pixels && input_image->f_pixels) {
+      liq_image_free_rgba_source (input_image); // bow can free the RGBA source if copy has been made in f_pixels
+    }
+
+    hist =
+        pam_acolorhashtoacolorhist (acht, input_image->gamma, options->malloc,
+        options->free);
+    pam_freeacolorhash (acht);
+    if (hist) {
+      liq_verbose_printf (options, "  made histogram...%d colors found",
+          hist->size);
+      remove_fixed_colors_from_histogram (hist, input_image,
+          options->target_mse);
+    }
+
+    return hist;
+  }
+}
+
+static void
+modify_alpha (liq_image * input_image, rgba_pixel * const row_pixels)
+{
+  /* IE6 makes colors with even slightest transparency completely transparent,
+     thus to improve situation in IE, make colors that are less than ~10% transparent
+     completely opaque */
+
+  const float min_opaque_val = input_image->min_opaque_val;
+  const float almost_opaque_val = min_opaque_val * 169.f / 256.f;
+  const unsigned int almost_opaque_val_int =
+      (min_opaque_val * 169.f / 256.f) * 255.f;
+
+  for (unsigned int col = 0; col < input_image->width; col++) {
+    const rgba_pixel px = row_pixels[col];
+
+    /* ie bug: to avoid visible step caused by forced opaqueness, linearily raise opaqueness of almost-opaque colors */
+    if (px.a >= almost_opaque_val_int) {
+      float al = px.a / 255.f;
+      al = almost_opaque_val + (al - almost_opaque_val) * (1.f -
+          almost_opaque_val) / (min_opaque_val - almost_opaque_val);
+      al *= 256.f;
+      row_pixels[col].a = al >= 255.f ? 255 : al;
+    }
+  }
+}
+
+/**
+ Builds two maps:
+    noise - approximation of areas with high-frequency noise, except straight edges. 1=flat, 0=noisy.
+    edges - noise map including all edges
+ */
+static void
+contrast_maps (liq_image * image)
+{
+  const int cols = image->width, rows = image->height;
+  unsigned char *restrict noise, *restrict edges, *restrict tmp;
+  const f_pixel *curr_row, *prev_row, *next_row;
+  int i, j;
+
+  if (cols < 4 || rows < 4 || (3 * cols * rows) > LIQ_HIGH_MEMORY_LIMIT) {
+    return;
+  }
+
+  noise = image->malloc (cols * rows);
+  edges = image->malloc (cols * rows);
+  tmp = image->malloc (cols * rows);
+
+  if (!noise || !edges || !tmp) {
+    return;
+  }
+
+  curr_row = prev_row = next_row = liq_image_get_row_f (image, 0);
+
+  for (j = 0; j < rows; j++) {
+    f_pixel prev, curr, next;
+
+    prev_row = curr_row;
+    curr_row = next_row;
+    next_row = liq_image_get_row_f (image, MIN (rows - 1, j + 1));
+
+    curr = curr_row[0];
+    next = curr;
+    for (i = 0; i < cols; i++) {
+      prev = curr;
+      curr = next;
+      next = curr_row[MIN (cols - 1, i + 1)];
+
+      // contrast is difference between pixels neighbouring horizontally and vertically
+      {
+        const float a = fabsf (prev.a + next.a - curr.a * 2.f),
+            r = fabsf (prev.r + next.r - curr.r * 2.f),
+            g = fabsf (prev.g + next.g - curr.g * 2.f),
+            b = fabsf (prev.b + next.b - curr.b * 2.f);
+
+        const f_pixel prevl = prev_row[i];
+        const f_pixel nextl = next_row[i];
+
+        const float a1 = fabsf (prevl.a + nextl.a - curr.a * 2.f),
+            r1 = fabsf (prevl.r + nextl.r - curr.r * 2.f),
+            g1 = fabsf (prevl.g + nextl.g - curr.g * 2.f),
+            b1 = fabsf (prevl.b + nextl.b - curr.b * 2.f);
+
+        const float horiz = MAX (MAX (a, r), MAX (g, b));
+        const float vert = MAX (MAX (a1, r1), MAX (g1, b1));
+        const float edge = MAX (horiz, vert);
+        float z = edge - fabsf (horiz - vert) * .5f;
+        z = 1.f - MAX (z, MIN (horiz, vert));
+        z *= z;                 // noise is amplified
+        z *= z;
+
+        z *= 256.f;
+        noise[j * cols + i] = z < 256 ? z : 255;
+        z = (1.f - edge) * 256.f;
+        edges[j * cols + i] = z < 256 ? z : 255;
+      }
+    }
+  }
+
+  // noise areas are shrunk and then expanded to remove thin edges from the map
+  liq_max3 (noise, tmp, cols, rows);
+  liq_max3 (tmp, noise, cols, rows);
+
+  liq_blur (noise, tmp, noise, cols, rows, 3);
+
+  liq_max3 (noise, tmp, cols, rows);
+
+  liq_min3 (tmp, noise, cols, rows);
+  liq_min3 (noise, tmp, cols, rows);
+  liq_min3 (tmp, noise, cols, rows);
+
+  liq_min3 (edges, tmp, cols, rows);
+  liq_max3 (tmp, edges, cols, rows);
+  for (int i = 0; i < cols * rows; i++)
+    edges[i] = MIN (noise[i], edges[i]);
+
+  image->free (tmp);
+
+  image->noise = noise;
+  image->edges = edges;
+}
+
+/**
+ * Builds map of neighbor pixels mapped to the same palette entry
+ *
+ * For efficiency/simplicity it mainly looks for same consecutive pixels horizontally
+ * and peeks 1 pixel above/below. Full 2d algorithm doesn't improve it significantly.
+ * Correct flood fill doesn't have visually good properties.
+ */
+static void
+update_dither_map (unsigned char *const *const row_pointers,
+    liq_image * input_image)
+{
+  const unsigned int width = input_image->width;
+  const unsigned int height = input_image->height;
+  unsigned char *const edges = input_image->edges;
+
+  for (unsigned int row = 0; row < height; row++) {
+    unsigned char lastpixel = row_pointers[row][0];
+    unsigned int lastcol = 0;
+
+    for (unsigned int col = 1; col < width; col++) {
+      const unsigned char px = row_pointers[row][col];
+
+      if (px != lastpixel || col == width - 1) {
+        float neighbor_count = 2.5f + col - lastcol;
+
+        unsigned int i = lastcol;
+        while (i < col) {
+          if (row > 0) {
+            unsigned char pixelabove = row_pointers[row - 1][i];
+            if (pixelabove == lastpixel)
+              neighbor_count += 1.f;
+          }
+          if (row < height - 1) {
+            unsigned char pixelbelow = row_pointers[row + 1][i];
+            if (pixelbelow == lastpixel)
+              neighbor_count += 1.f;
+          }
+          i++;
+        }
+
+        while (lastcol <= col) {
+          float e = edges[row * width + lastcol] / 255.f;
+          e *= 1.f - 2.5f / neighbor_count;
+          edges[row * width + lastcol++] = e * 255.f;
+        }
+        lastpixel = px;
+      }
+    }
+  }
+  input_image->dither_map = input_image->edges;
+  input_image->edges = NULL;
+}
+
+static colormap *
+add_fixed_colors_to_palette (colormap * palette, const int max_colors,
+    const f_pixel fixed_colors[], const int fixed_colors_count,
+    void *(*malloc) (size_t), void (*free) (void *))
+{
+  colormap *newpal;
+  unsigned int i, palette_max;
+  int j;
+
+  if (!fixed_colors_count)
+    return palette;
+
+  newpal =
+      pam_colormap (MIN (max_colors,
+          (palette ? palette->colors : 0) + fixed_colors_count), malloc, free);
+
+  i = 0;
+  if (palette && fixed_colors_count < max_colors) {
+    palette_max = MIN (palette->colors, max_colors - fixed_colors_count);
+    for (; i < palette_max; i++) {
+      newpal->palette[i] = palette->palette[i];
+    }
+  }
+  for (j = 0; j < MIN (max_colors, fixed_colors_count); j++) {
+    newpal->palette[i++] = (colormap_item) {
+    .acolor = fixed_colors[j],.fixed = true,};
+  }
+  if (palette)
+    pam_freecolormap (palette);
+  return newpal;
+}
+
+static void
+adjust_histogram_callback (hist_item * item, float diff)
+{
+  item->adjusted_weight =
+      (item->perceptual_weight + item->adjusted_weight) * (sqrtf (1.f + diff));
+}
+
+/**
+ Repeats mediancut with different histogram weights to find palette with minimum error.
+
+ feedback_loop_trials controls how long the search will take. < 0 skips the iteration.
+ */
+static colormap *
+find_best_palette (histogram * hist, const liq_attr * options,
+    const double max_mse, const f_pixel fixed_colors[],
+    const unsigned int fixed_colors_count, double *palette_error_p)
+{
+  unsigned int max_colors = options->max_colors;
+
+  // if output is posterized it doesn't make sense to aim for perfrect colors, so increase target_mse
+  // at this point actual gamma is not set, so very conservative posterization estimate is used
+  const double target_mse = MIN (max_mse, MAX (options->target_mse,
+          pow ((1 << options->min_posterization_output) / 1024.0, 2)));
+  int feedback_loop_trials = options->feedback_loop_trials;
+  colormap *acolormap = NULL;
+  double least_error = MAX_DIFF;
+  double target_mse_overshoot = feedback_loop_trials > 0 ? 1.05 : 1.0;
+  const double percent =
+      (double) (feedback_loop_trials > 0 ? feedback_loop_trials : 1) / 100.0;
+
+  do {
+    colormap *newmap;
+    double total_error;
+
+    if (hist->size && fixed_colors_count < max_colors) {
+      newmap =
+          mediancut (hist, options->min_opaque_val,
+          max_colors - fixed_colors_count, target_mse * target_mse_overshoot,
+          MAX (MAX (90.0 / 65536.0, target_mse), least_error) * 1.2,
+          options->malloc, options->free);
+    } else {
+      feedback_loop_trials = 0;
+      newmap = NULL;
+    }
+    newmap =
+        add_fixed_colors_to_palette (newmap, max_colors, fixed_colors,
+        fixed_colors_count, options->malloc, options->free);
+    if (!newmap) {
+      return NULL;
+    }
+
+    if (feedback_loop_trials <= 0) {
+      return newmap;
+    }
+    // after palette has been created, total error (MSE) is calculated to keep the best palette
+    // at the same time Voronoi iteration is done to improve the palette
+    // and histogram weights are adjusted based on remapping error to give more weight to poorly matched colors
+
+    {
+      const bool first_run_of_target_mse = !acolormap && target_mse > 0;
+      total_error =
+          viter_do_iteration (hist, newmap, options->min_opaque_val,
+          first_run_of_target_mse ? NULL : adjust_histogram_callback, !acolormap
+          || options->fast_palette);
+    }
+
+    // goal is to increase quality or to reduce number of colors used if quality is good enough
+    if (!acolormap || total_error < least_error || (total_error <= target_mse
+            && newmap->colors < max_colors)) {
+      if (acolormap)
+        pam_freecolormap (acolormap);
+      acolormap = newmap;
+
+      if (total_error < target_mse && total_error > 0) {
+        // voronoi iteration improves quality above what mediancut aims for
+        // this compensates for it, making mediancut aim for worse
+        target_mse_overshoot =
+            MIN (target_mse_overshoot * 1.25, target_mse / total_error);
+      }
+
+      least_error = total_error;
+
+      // if number of colors could be reduced, try to keep it that way
+      // but allow extra color as a bit of wiggle room in case quality can be improved too
+      max_colors = MIN (newmap->colors + 1, max_colors);
+
+      feedback_loop_trials -= 1;        // asymptotic improvement could make it go on forever
+    } else {
+      for (unsigned int j = 0; j < hist->size; j++) {
+        hist->achv[j].adjusted_weight =
+            (hist->achv[j].perceptual_weight +
+            hist->achv[j].adjusted_weight) / 2.0;
+      }
+
+      target_mse_overshoot = 1.0;
+      feedback_loop_trials -= 6;
+      // if error is really bad, it's unlikely to improve, so end sooner
+      if (total_error > least_error * 4)
+        feedback_loop_trials -= 3;
+      pam_freecolormap (newmap);
+    }
+
+    liq_verbose_printf (options, "  selecting colors...%d%%", 100 - MAX (0,
+            (int) (feedback_loop_trials / percent)));
+  }
+  while (feedback_loop_trials > 0);
+
+  *palette_error_p = least_error;
+  return acolormap;
+}
+
+static liq_result *
+pngquant_quantize (histogram * hist, const liq_attr * options,
+    const liq_image * img)
+{
+  colormap *acolormap;
+  double palette_error = -1;
+
+  // no point having perfect match with imperfect colors (ignorebits > 0)
+  const bool fast_palette = options->fast_palette || hist->ignorebits > 0;
+  const bool few_input_colors =
+      hist->size + img->fixed_colors_count <= options->max_colors;
+  liq_result *result;
+
+  // If image has few colors to begin with (and no quality degradation is required)
+  // then it's possible to skip quantization entirely
+  if (few_input_colors && options->target_mse == 0) {
+    acolormap = pam_colormap (hist->size, options->malloc, options->free);
+    for (unsigned int i = 0; i < hist->size; i++) {
+      acolormap->palette[i].acolor = hist->achv[i].acolor;
+      acolormap->palette[i].popularity = hist->achv[i].perceptual_weight;
+    }
+    acolormap =
+        add_fixed_colors_to_palette (acolormap, options->max_colors,
+        img->fixed_colors, img->fixed_colors_count, options->malloc,
+        options->free);
+    palette_error = 0;
+  } else {
+    const double max_mse = options->max_mse * (few_input_colors ? 0.33 : 1.0);  // when degrading image that's already paletted, require much higher improvement, since pal2pal often looks bad and there's little gain
+    const double iteration_limit = options->voronoi_iteration_limit;
+    unsigned int iterations = options->voronoi_iterations;
+
+    acolormap =
+        find_best_palette (hist, options, max_mse, img->fixed_colors,
+        img->fixed_colors_count, &palette_error);
+    if (!acolormap) {
+      return NULL;
+    }
+    // Voronoi iteration approaches local minimum for the palette
+    if (!iterations && palette_error < 0 && max_mse < MAX_DIFF)
+      iterations = 1;           // otherwise total error is never calculated and MSE limit won't work
+
+    if (iterations) {
+      double previous_palette_error = MAX_DIFF;
+      unsigned int i;
+
+      // likely_colormap_index (used and set in viter_do_iteration) can't point to index outside colormap
+      if (acolormap->colors < 256)
+        for (unsigned int j = 0; j < hist->size; j++) {
+          if (hist->achv[j].tmp.likely_colormap_index >= acolormap->colors) {
+            hist->achv[j].tmp.likely_colormap_index = 0;        // actual value doesn't matter, as the guess is out of date anyway
+          }
+        }
+
+      verbose_print (options, "  moving colormap towards local minimum");
+
+      for (i = 0; i < iterations; i++) {
+        palette_error =
+            viter_do_iteration (hist, acolormap, options->min_opaque_val, NULL,
+            i == 0 || options->fast_palette);
+
+        if (fabs (previous_palette_error - palette_error) < iteration_limit) {
+          break;
+        }
+
+        if (palette_error > max_mse * 1.5) {    // probably hopeless
+          if (palette_error > max_mse * 3.0)
+            break;              // definitely hopeless
+          i++;
+        }
+
+        previous_palette_error = palette_error;
+      }
+    }
+
+    if (palette_error > max_mse) {
+      liq_verbose_printf (options,
+          "  image degradation MSE=%.3f (Q=%d) exceeded limit of %.3f (%d)",
+          palette_error * 65536.0 / 6.0, mse_to_quality (palette_error),
+          max_mse * 65536.0 / 6.0, mse_to_quality (max_mse));
+      pam_freecolormap (acolormap);
+      return NULL;
+    }
+  }
+
+  sort_palette (acolormap, options);
+
+  result = options->malloc (sizeof (liq_result));
+  if (!result)
+    return NULL;
+  *result = (liq_result) {
+  .magic_header = liq_result_magic,.malloc = options->malloc,.free =
+        options->free,.palette = acolormap,.palette_error =
+        palette_error,.fast_palette = fast_palette,.use_dither_map =
+        options->use_dither_map,.gamma =
+        img->gamma,.min_posterization_output =
+        options->min_posterization_output,};
+  return result;
+}
+
+LIQ_EXPORT liq_error
+liq_write_remapped_image (liq_result * result, liq_image * input_image,
+    void *buffer, size_t buffer_size)
+{
+  size_t required_size;
+  unsigned char **rows;
+  unsigned char *buffer_bytes;
+  unsigned i;
+
+  if (!CHECK_STRUCT_TYPE (result, liq_result)) {
+    return LIQ_INVALID_POINTER;
+  }
+  if (!CHECK_STRUCT_TYPE (input_image, liq_image)) {
+    return LIQ_INVALID_POINTER;
+  }
+  if (!CHECK_USER_POINTER (buffer)) {
+    return LIQ_INVALID_POINTER;
+  }
+
+  required_size = input_image->width * input_image->height;
+  if (buffer_size < required_size) {
+    return LIQ_BUFFER_TOO_SMALL;
+  }
+
+  rows = g_alloca (sizeof (unsigned char *) * input_image->height);
+  buffer_bytes = buffer;
+  for (i = 0; i < input_image->height; i++) {
+    rows[i] = &buffer_bytes[input_image->width * i];
+  }
+  return liq_write_remapped_image_rows (result, input_image, rows);
+}
+
+LIQ_EXPORT liq_error
+liq_write_remapped_image_rows (liq_result * quant, liq_image * input_image,
+    unsigned char **row_pointers)
+{
+  unsigned int i;
+  liq_remapping_result *result;
+  float remapping_error;
+
+  if (!CHECK_STRUCT_TYPE (quant, liq_result))
+    return LIQ_INVALID_POINTER;
+  if (!CHECK_STRUCT_TYPE (input_image, liq_image))
+    return LIQ_INVALID_POINTER;
+  for (i = 0; i < input_image->height; i++) {
+    if (!CHECK_USER_POINTER (row_pointers + i)
+        || !CHECK_USER_POINTER (row_pointers[i]))
+      return LIQ_INVALID_POINTER;
+  }
+
+  if (quant->remapping) {
+    liq_remapping_result_destroy (quant->remapping);
+  }
+
+  result = quant->remapping = liq_remapping_result_create (quant);
+  if (!result)
+    return LIQ_OUT_OF_MEMORY;
+
+  if (!input_image->edges && !input_image->dither_map && quant->use_dither_map) {
+    contrast_maps (input_image);
+  }
+
+  /*
+   ** Step 4: map the colors in the image to their closest match in the
+   ** new colormap, and write 'em out.
+   */
+
+  remapping_error = result->palette_error;
+  if (result->dither_level == 0) {
+    set_rounded_palette (&result->int_palette, result->palette, result->gamma,
+        quant->min_posterization_output);
+    remapping_error =
+        remap_to_palette (input_image, row_pointers, result->palette,
+        quant->fast_palette);
+  } else {
+    const bool generate_dither_map = result->use_dither_map
+        && (input_image->edges && !input_image->dither_map);
+    if (generate_dither_map) {
+      // If dithering (with dither map) is required, this image is used to find areas that require dithering
+      remapping_error =
+          remap_to_palette (input_image, row_pointers, result->palette,
+          quant->fast_palette);
+      update_dither_map (row_pointers, input_image);
+    }
+    // remapping above was the last chance to do voronoi iteration, hence the final palette is set after remapping
+    set_rounded_palette (&result->int_palette, result->palette, result->gamma,
+        quant->min_posterization_output);
+
+    remap_to_palette_floyd (input_image, row_pointers, result->palette,
+        MAX (remapping_error * 2.4, 16.f / 256.f), result->use_dither_map,
+        generate_dither_map, result->dither_level);
+  }
+
+  // remapping error from dithered image is absurd, so always non-dithered value is used
+  // palette_error includes some perceptual weighting from histogram which is closer correlated with dssim
+  // so that should be used when possible.
+  if (result->palette_error < 0) {
+    result->palette_error = remapping_error;
+  }
+
+  return LIQ_OK;
+}
+
+LIQ_EXPORT int
+liq_version (void)
+{
+  return LIQ_VERSION;
+}
diff --git a/gst/dvbsubenc/libimagequant/libimagequant.h b/gst/dvbsubenc/libimagequant/libimagequant.h
new file mode 100644
index 000000000..8b7e7bcf1
--- /dev/null
+++ b/gst/dvbsubenc/libimagequant/libimagequant.h
@@ -0,0 +1,113 @@
+/*
+ * http://pngquant.org
+ */
+
+#ifndef LIBIMAGEQUANT_H
+#define LIBIMAGEQUANT_H
+
+#ifndef LIQ_EXPORT
+#define LIQ_EXPORT extern
+#endif
+
+#define LIQ_VERSION 20401
+#define LIQ_VERSION_STRING "2.4.1"
+
+#ifndef LIQ_PRIVATE
+#if defined(__GNUC__) || defined (__llvm__)
+#define LIQ_PRIVATE __attribute__((visibility("hidden")))
+#else
+#define LIQ_PRIVATE
+#endif
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stddef.h>
+#include <glib.h>
+
+typedef struct liq_attr liq_attr;
+typedef struct liq_image liq_image;
+typedef struct liq_result liq_result;
+
+typedef struct liq_color {
+    unsigned char r, g, b, a;
+} liq_color;
+
+typedef struct liq_palette {
+    unsigned int count;
+    liq_color entries[256];
+} liq_palette;
+
+typedef enum liq_error {
+    LIQ_OK = 0,
+    LIQ_QUALITY_TOO_LOW = 99,
+    LIQ_VALUE_OUT_OF_RANGE = 100,
+    LIQ_OUT_OF_MEMORY,
+    LIQ_NOT_READY,
+    LIQ_BITMAP_NOT_AVAILABLE,
+    LIQ_BUFFER_TOO_SMALL,
+    LIQ_INVALID_POINTER,
+} liq_error;
+
+enum liq_ownership {LIQ_OWN_ROWS=4, LIQ_OWN_PIXELS=8};
+
+LIQ_EXPORT liq_attr* liq_attr_create(void);
+LIQ_EXPORT liq_attr* liq_attr_create_with_allocator(void* (*malloc)(size_t), void (*free)(void*));
+LIQ_EXPORT liq_attr* liq_attr_copy(liq_attr *orig);
+LIQ_EXPORT void liq_attr_destroy(liq_attr *attr);
+
+LIQ_EXPORT liq_error liq_set_max_colors(liq_attr* attr, int colors);
+LIQ_EXPORT int liq_get_max_colors(const liq_attr* attr);
+LIQ_EXPORT liq_error liq_set_speed(liq_attr* attr, int speed);
+LIQ_EXPORT int liq_get_speed(const liq_attr* attr);
+LIQ_EXPORT liq_error liq_set_min_opacity(liq_attr* attr, int min);
+LIQ_EXPORT int liq_get_min_opacity(const liq_attr* attr);
+LIQ_EXPORT liq_error liq_set_min_posterization(liq_attr* attr, int bits);
+LIQ_EXPORT int liq_get_min_posterization(const liq_attr* attr);
+LIQ_EXPORT liq_error liq_set_quality(liq_attr* attr, int minimum, int maximum);
+LIQ_EXPORT int liq_get_min_quality(const liq_attr* attr);
+LIQ_EXPORT int liq_get_max_quality(const liq_attr* attr);
+LIQ_EXPORT void liq_set_last_index_transparent(liq_attr* attr, int is_last);
+
+typedef void liq_log_callback_function(const liq_attr*, const char *message, void* user_info);
+typedef void liq_log_flush_callback_function(const liq_attr*, void* user_info);
+LIQ_EXPORT void liq_set_log_callback(liq_attr*, liq_log_callback_function*, void* user_info);
+LIQ_EXPORT void liq_set_log_flush_callback(liq_attr*, liq_log_flush_callback_function*, void* user_info);
+
+LIQ_EXPORT liq_image *liq_image_create_rgba_rows(liq_attr *attr, void* rows[], int width, int height, double gamma);
+LIQ_EXPORT liq_image *liq_image_create_rgba(liq_attr *attr, void* bitmap, int width, int height, double gamma);
+
+typedef void liq_image_get_rgba_row_callback(liq_color row_out[], int row, int width, void* user_info);
+LIQ_EXPORT liq_image *liq_image_create_custom(liq_attr *attr, liq_image_get_rgba_row_callback *row_callback, void* user_info, int width, int height, double gamma);
+
+LIQ_EXPORT liq_error liq_image_set_memory_ownership(liq_image *image, int ownership_flags);
+LIQ_EXPORT liq_error liq_image_add_fixed_color(liq_image *img, liq_color color);
+LIQ_EXPORT int liq_image_get_width(const liq_image *img);
+LIQ_EXPORT int liq_image_get_height(const liq_image *img);
+LIQ_EXPORT void liq_image_destroy(liq_image *img);
+
+LIQ_EXPORT liq_result *liq_quantize_image(liq_attr *options, liq_image *input_image);
+
+LIQ_EXPORT liq_error liq_set_dithering_level(liq_result *res, float dither_level);
+LIQ_EXPORT liq_error liq_set_output_gamma(liq_result* res, double gamma);
+LIQ_EXPORT double liq_get_output_gamma(const liq_result *result);
+
+LIQ_EXPORT const liq_palette *liq_get_palette(liq_result *result);
+
+LIQ_EXPORT liq_error liq_write_remapped_image(liq_result *result, liq_image *input_image, void *buffer, size_t buffer_size);
+LIQ_EXPORT liq_error liq_write_remapped_image_rows(liq_result *result, liq_image *input_image, unsigned char **row_pointers);
+
+LIQ_EXPORT double liq_get_quantization_error(liq_result *result);
+LIQ_EXPORT int liq_get_quantization_quality(liq_result *result);
+
+LIQ_EXPORT void liq_result_destroy(liq_result *);
+LIQ_EXPORT int liq_version(void);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/gst/dvbsubenc/libimagequant/mediancut.c b/gst/dvbsubenc/libimagequant/mediancut.c
new file mode 100644
index 000000000..c6f471f34
--- /dev/null
+++ b/gst/dvbsubenc/libimagequant/mediancut.c
@@ -0,0 +1,597 @@
+/*
+** Copyright (C) 1989, 1991 by Jef Poskanzer.
+** Copyright (C) 1997, 2000, 2002 by Greg Roelofs; based on an idea by
+**                                Stefan Schneider.
+** © 2009-2013 by Kornel Lesinski.
+**
+** Permission to use, copy, modify, and distribute this software and its
+** documentation for any purpose and without fee is hereby granted, provided
+** that the above copyright notice appear in all copies and that both that
+** copyright notice and this permission notice appear in supporting
+** documentation.  This software is provided "as is" without express or
+** implied warranty.
+*/
+
+#include <stdlib.h>
+#include <stddef.h>
+
+#include "libimagequant.h"
+#include "pam.h"
+#include "mediancut.h"
+
+#define index_of_channel(ch) (offsetof(f_pixel,ch)/sizeof(float))
+
+static f_pixel averagepixels (unsigned int clrs, const hist_item achv[],
+    float min_opaque_val, const f_pixel center);
+
+struct box
+{
+  f_pixel color;
+  f_pixel variance;
+  double sum, total_error, max_error;
+  unsigned int ind;
+  unsigned int colors;
+};
+
+ALWAYS_INLINE static double variance_diff (double val,
+    const double good_enough);
+inline static double
+variance_diff (double val, const double good_enough)
+{
+  val *= val;
+  if (val < good_enough * good_enough)
+    return val * 0.25;
+  return val;
+}
+
+/** Weighted per-channel variance of the box. It's used to decide which channel to split by */
+static f_pixel
+box_variance (const hist_item achv[], const struct box *box)
+{
+  f_pixel mean = box->color;
+  double variancea = 0, variancer = 0, varianceg = 0, varianceb = 0;
+
+  for (unsigned int i = 0; i < box->colors; ++i) {
+    f_pixel px = achv[box->ind + i].acolor;
+    double weight = achv[box->ind + i].adjusted_weight;
+    variancea += variance_diff (mean.a - px.a, 2.0 / 256.0) * weight;
+    variancer += variance_diff (mean.r - px.r, 1.0 / 256.0) * weight;
+    varianceg += variance_diff (mean.g - px.g, 1.0 / 256.0) * weight;
+    varianceb += variance_diff (mean.b - px.b, 1.0 / 256.0) * weight;
+  }
+
+  return (f_pixel) {
+  .a = variancea * (4.0 / 16.0),.r = variancer * (7.0 / 16.0),.g =
+        varianceg * (9.0 / 16.0),.b = varianceb * (5.0 / 16.0),};
+}
+
+static double
+box_max_error (const hist_item achv[], const struct box *box)
+{
+  f_pixel mean = box->color;
+  double max_error = 0;
+  unsigned int i;
+
+  for (i = 0; i < box->colors; ++i) {
+    const double diff = colordifference (mean, achv[box->ind + i].acolor);
+    if (diff > max_error) {
+      max_error = diff;
+    }
+  }
+  return max_error;
+}
+
+ALWAYS_INLINE static double color_weight (f_pixel median, hist_item h);
+
+static inline void
+hist_item_swap (hist_item * l, hist_item * r)
+{
+  if (l != r) {
+    hist_item t = *l;
+    *l = *r;
+    *r = t;
+  }
+}
+
+ALWAYS_INLINE static unsigned int qsort_pivot (const hist_item * const base,
+    const unsigned int len);
+inline static unsigned int
+qsort_pivot (const hist_item * const base, const unsigned int len)
+{
+  if (len < 32) {
+    return len / 2;
+  }
+
+  {
+    const unsigned int aidx = 8, bidx = len / 2, cidx = len - 1;
+    const unsigned int a = base[aidx].tmp.sort_value, b =
+        base[bidx].tmp.sort_value, c = base[cidx].tmp.sort_value;
+    return (a < b) ? ((b < c) ? bidx : ((a < c) ? cidx : aidx))
+        : ((b > c) ? bidx : ((a < c) ? aidx : cidx));
+  }
+}
+
+ALWAYS_INLINE static unsigned int qsort_partition (hist_item * const base,
+    const unsigned int len);
+inline static unsigned int
+qsort_partition (hist_item * const base, const unsigned int len)
+{
+  unsigned int l = 1, r = len;
+  if (len >= 8) {
+    hist_item_swap (&base[0], &base[qsort_pivot (base, len)]);
+  }
+
+  {
+    const unsigned int pivot_value = base[0].tmp.sort_value;
+    while (l < r) {
+      if (base[l].tmp.sort_value >= pivot_value) {
+        l++;
+      } else {
+        while (l < --r && base[r].tmp.sort_value <= pivot_value) {
+        }
+        hist_item_swap (&base[l], &base[r]);
+      }
+    }
+    l--;
+    hist_item_swap (&base[0], &base[l]);
+  }
+
+  return l;
+}
+
+/** quick select algorithm */
+static void
+hist_item_sort_range (hist_item * base, unsigned int len,
+    unsigned int sort_start)
+{
+  for (;;) {
+    const unsigned int l = qsort_partition (base, len), r = l + 1;
+
+    if (l > 0 && sort_start < l) {
+      len = l;
+    } else if (r < len && sort_start > r) {
+      base += r;
+      len -= r;
+      sort_start -= r;
+    } else
+      break;
+  }
+}
+
+/** sorts array to make sum of weights lower than halfvar one side, returns edge between <halfvar and >halfvar parts of the set */
+static hist_item *
+hist_item_sort_halfvar (hist_item * base, unsigned int len,
+    double *const lowervar, const double halfvar)
+{
+  do {
+    const unsigned int l = qsort_partition (base, len), r = l + 1;
+
+    // check if sum of left side is smaller than half,
+    // if it is, then it doesn't need to be sorted
+    unsigned int t = 0;
+    double tmpsum = *lowervar;
+    while (t <= l && tmpsum < halfvar)
+      tmpsum += base[t++].color_weight;
+
+    if (tmpsum < halfvar) {
+      *lowervar = tmpsum;
+    } else {
+      if (l > 0) {
+        hist_item *res = hist_item_sort_halfvar (base, l, lowervar, halfvar);
+        if (res)
+          return res;
+      } else {
+        // End of left recursion. This will be executed in order from the first element.
+        *lowervar += base[0].color_weight;
+        if (*lowervar > halfvar)
+          return &base[0];
+      }
+    }
+
+    if (len > r) {
+      base += r;
+      len -= r;                 // tail-recursive "call"
+    } else {
+      *lowervar += base[r].color_weight;
+      return (*lowervar > halfvar) ? &base[r] : NULL;
+    }
+  } while (1);
+}
+
+static f_pixel get_median (const struct box *b, hist_item achv[]);
+
+typedef struct
+{
+  unsigned int chan;
+  float variance;
+} channelvariance;
+
+static int
+comparevariance (const void *ch1, const void *ch2)
+{
+  return ((const channelvariance *) ch1)->variance >
+      ((const channelvariance *) ch2)->variance ? -1 : (((const channelvariance
+              *) ch1)->variance <
+      ((const channelvariance *) ch2)->variance ? 1 : 0);
+}
+
+/** Finds which channels need to be sorted first and preproceses achv for fast sort */
+static double
+prepare_sort (struct box *b, hist_item achv[])
+{
+  /*
+   ** Sort dimensions by their variance, and then sort colors first by dimension with highest variance
+   */
+  double totalvar = 0;
+  channelvariance channels[4] = {
+    {index_of_channel (r), b->variance.r},
+    {index_of_channel (g), b->variance.g},
+    {index_of_channel (b), b->variance.b},
+    {index_of_channel (a), b->variance.a},
+  };
+
+  qsort (channels, 4, sizeof (channels[0]), comparevariance);
+
+  for (unsigned int i = 0; i < b->colors; i++) {
+    const float *chans = (const float *) &achv[b->ind + i].acolor;
+    // Only the first channel really matters. When trying median cut many times
+    // with different histogram weights, I don't want sort randomness to influence outcome.
+    achv[b->ind + i].tmp.sort_value =
+        ((unsigned int) (chans[channels[0].chan] *
+            65535.0) << 16) | (unsigned int) ((chans[channels[2].chan] +
+            chans[channels[1].chan] / 2.0 +
+            chans[channels[3].chan] / 4.0) * 65535.0);
+  }
+
+  {
+    const f_pixel median = get_median (b, achv);
+
+    // box will be split to make color_weight of each side even
+    const unsigned int ind = b->ind, end = ind + b->colors;
+    for (unsigned int j = ind; j < end; j++)
+      totalvar += (achv[j].color_weight = color_weight (median, achv[j]));
+  }
+  return totalvar / 2.0;
+}
+
+/** finds median in unsorted set by sorting only minimum required */
+static f_pixel
+get_median (const struct box *b, hist_item achv[])
+{
+  const unsigned int median_start = (b->colors - 1) / 2;
+
+  hist_item_sort_range (&(achv[b->ind]), b->colors, median_start);
+
+  if (b->colors & 1)
+    return achv[b->ind + median_start].acolor;
+
+  // technically the second color is not guaranteed to be sorted correctly
+  // but most of the time it is good enough to be useful
+  return averagepixels (2, &achv[b->ind + median_start], 1.0, (f_pixel) {
+      0.5, 0.5, 0.5, 0.5}
+  );
+}
+
+/*
+ ** Find the best splittable box. -1 if no boxes are splittable.
+ */
+static int
+best_splittable_box (struct box *bv, unsigned int boxes, const double max_mse)
+{
+  int bi = -1;
+  double maxsum = 0;
+  unsigned int i;
+
+  for (i = 0; i < boxes; i++) {
+    if (bv[i].colors < 2) {
+      continue;
+    }
+    // looks only at max variance, because it's only going to split by it
+    {
+      const double cv =
+          MAX (bv[i].variance.r, MAX (bv[i].variance.g, bv[i].variance.b));
+      double thissum = bv[i].sum * MAX (bv[i].variance.a, cv);
+
+      if (bv[i].max_error > max_mse) {
+        thissum = thissum * bv[i].max_error / max_mse;
+      }
+
+      if (thissum > maxsum) {
+        maxsum = thissum;
+        bi = i;
+      }
+    }
+  }
+  return bi;
+}
+
+inline static double
+color_weight (f_pixel median, hist_item h)
+{
+  float diff = colordifference (median, h.acolor);
+  // if color is "good enough", don't split further
+  if (diff < 2.f / 256.f / 256.f)
+    diff /= 2.f;
+  return sqrt (diff) * (sqrt (1.0 + h.adjusted_weight) - 1.0);
+}
+
+static void set_colormap_from_boxes (colormap * map, struct box *bv,
+    unsigned int boxes, hist_item * achv);
+static void adjust_histogram (hist_item * achv, const colormap * map,
+    const struct box *bv, unsigned int boxes);
+
+static double
+box_error (const struct box *box, const hist_item achv[])
+{
+  f_pixel avg = box->color;
+  unsigned int i;
+  double total_error = 0;
+
+  for (i = 0; i < box->colors; ++i) {
+    total_error +=
+        colordifference (avg,
+        achv[box->ind + i].acolor) * achv[box->ind + i].perceptual_weight;
+  }
+
+  return total_error;
+}
+
+
+static bool
+total_box_error_below_target (double target_mse, struct box bv[],
+    unsigned int boxes, const histogram * hist)
+{
+  double total_error = 0;
+  unsigned int i;
+
+  target_mse *= hist->total_perceptual_weight;
+
+  for (i = 0; i < boxes; i++) {
+    // error is (re)calculated lazily
+    if (bv[i].total_error >= 0) {
+      total_error += bv[i].total_error;
+    }
+    if (total_error > target_mse)
+      return false;
+  }
+
+  for (i = 0; i < boxes; i++) {
+    if (bv[i].total_error < 0) {
+      bv[i].total_error = box_error (&bv[i], hist->achv);
+      total_error += bv[i].total_error;
+    }
+    if (total_error > target_mse)
+      return false;
+  }
+
+  return true;
+}
+
+/*
+ ** Here is the fun part, the median-cut colormap generator.  This is based
+ ** on Paul Heckbert's paper, "Color Image Quantization for Frame Buffer
+ ** Display," SIGGRAPH 1982 Proceedings, page 297.
+ */
+LIQ_PRIVATE colormap *
+mediancut (histogram * hist, const float min_opaque_val, unsigned int newcolors,
+    const double target_mse, const double max_mse, void *(*malloc) (size_t),
+    void (*free) (void *))
+{
+  hist_item *achv = hist->achv;
+  struct box *bv = g_alloca (sizeof (struct box) * newcolors);
+  unsigned int i, boxes, subset_size;
+  colormap *representative_subset = NULL;
+  colormap *map;
+
+  /*
+   ** Set up the initial box.
+   */
+  bv[0].ind = 0;
+  bv[0].colors = hist->size;
+  bv[0].color =
+      averagepixels (bv[0].colors, &achv[bv[0].ind], min_opaque_val, (f_pixel) {
+      0.5, 0.5, 0.5, 0.5});
+  bv[0].variance = box_variance (achv, &bv[0]);
+  bv[0].max_error = box_max_error (achv, &bv[0]);
+  bv[0].sum = 0;
+  bv[0].total_error = -1;
+  for (i = 0; i < bv[0].colors; i++)
+    bv[0].sum += achv[i].adjusted_weight;
+
+  boxes = 1;
+
+  // remember smaller palette for fast searching
+  subset_size = ceilf (powf (newcolors, 0.7f));
+
+  /*
+   ** Main loop: split boxes until we have enough.
+   */
+  while (boxes < newcolors) {
+    unsigned int indx, clrs;
+    unsigned int break_at, i;
+    double lowervar = 0, halfvar, current_max_mse;
+    hist_item *break_p;
+    double sm, lowersum;
+    int bi;
+    f_pixel previous_center;
+
+    if (boxes == subset_size) {
+      representative_subset = pam_colormap (boxes, malloc, free);
+      set_colormap_from_boxes (representative_subset, bv, boxes, achv);
+    }
+    // first splits boxes that exceed quality limit (to have colors for things like odd green pixel),
+    // later raises the limit to allow large smooth areas/gradients get colors.
+    current_max_mse = max_mse + (boxes / (double) newcolors) * 16.0 * max_mse;
+    bi = best_splittable_box (bv, boxes, current_max_mse);
+    if (bi < 0)
+      break;                    /* ran out of colors! */
+
+    indx = bv[bi].ind;
+    clrs = bv[bi].colors;
+
+    /*
+       Classic implementation tries to get even number of colors or pixels in each subdivision.
+
+       Here, instead of popularity I use (sqrt(popularity)*variance) metric.
+       Each subdivision balances number of pixels (popular colors) and low variance -
+       boxes can be large if they have similar colors. Later boxes with high variance
+       will be more likely to be split.
+
+       Median used as expected value gives much better results than mean.
+     */
+    halfvar = prepare_sort (&bv[bi], achv);
+
+    // hist_item_sort_halfvar sorts and sums lowervar at the same time
+    // returns item to break at …minus one, which does smell like an off-by-one error.
+    break_p = hist_item_sort_halfvar (&achv[indx], clrs, &lowervar, halfvar);
+    break_at = MIN (clrs - 1, break_p - &achv[indx] + 1);
+
+    /*
+     ** Split the box.
+     */
+    sm = bv[bi].sum;
+    lowersum = 0;
+    for (i = 0; i < break_at; i++)
+      lowersum += achv[indx + i].adjusted_weight;
+
+    previous_center = bv[bi].color;
+    bv[bi].colors = break_at;
+    bv[bi].sum = lowersum;
+    bv[bi].color =
+        averagepixels (bv[bi].colors, &achv[bv[bi].ind], min_opaque_val,
+        previous_center);
+    bv[bi].total_error = -1;
+    bv[bi].variance = box_variance (achv, &bv[bi]);
+    bv[bi].max_error = box_max_error (achv, &bv[bi]);
+    bv[boxes].ind = indx + break_at;
+    bv[boxes].colors = clrs - break_at;
+    bv[boxes].sum = sm - lowersum;
+    bv[boxes].color =
+        averagepixels (bv[boxes].colors, &achv[bv[boxes].ind], min_opaque_val,
+        previous_center);
+    bv[boxes].total_error = -1;
+    bv[boxes].variance = box_variance (achv, &bv[boxes]);
+    bv[boxes].max_error = box_max_error (achv, &bv[boxes]);
+
+    ++boxes;
+
+    if (total_box_error_below_target (target_mse, bv, boxes, hist)) {
+      break;
+    }
+  }
+
+  map = pam_colormap (boxes, malloc, free);
+  set_colormap_from_boxes (map, bv, boxes, achv);
+
+  map->subset_palette = representative_subset;
+  adjust_histogram (achv, map, bv, boxes);
+
+  return map;
+}
+
+static void
+set_colormap_from_boxes (colormap * map, struct box *bv, unsigned int boxes,
+    hist_item * achv)
+{
+  /*
+   ** Ok, we've got enough boxes.  Now choose a representative color for
+   ** each box.  There are a number of possible ways to make this choice.
+   ** One would be to choose the center of the box; this ignores any structure
+   ** within the boxes.  Another method would be to average all the colors in
+   ** the box - this is the method specified in Heckbert's paper.
+   */
+
+  for (unsigned int bi = 0; bi < boxes; ++bi) {
+    map->palette[bi].acolor = bv[bi].color;
+
+    /* store total color popularity (perceptual_weight is approximation of it) */
+    map->palette[bi].popularity = 0;
+    for (unsigned int i = bv[bi].ind; i < bv[bi].ind + bv[bi].colors; i++) {
+      map->palette[bi].popularity += achv[i].perceptual_weight;
+    }
+  }
+}
+
+/* increase histogram popularity by difference from the final color (this is used as part of feedback loop) */
+static void
+adjust_histogram (hist_item * achv, const colormap * map, const struct box *bv,
+    unsigned int boxes)
+{
+  for (unsigned int bi = 0; bi < boxes; ++bi) {
+    for (unsigned int i = bv[bi].ind; i < bv[bi].ind + bv[bi].colors; i++) {
+      achv[i].adjusted_weight *=
+          sqrt (1.0 + colordifference (map->palette[bi].acolor,
+              achv[i].acolor) / 4.0);
+      achv[i].tmp.likely_colormap_index = bi;
+    }
+  }
+}
+
+static f_pixel
+averagepixels (unsigned int clrs, const hist_item achv[],
+    const float min_opaque_val, const f_pixel center)
+{
+  double r = 0, g = 0, b = 0, a = 0, new_a = 0, sum = 0;
+  float maxa = 0;
+
+  // first find final opacity in order to blend colors at that opacity
+  for (unsigned int i = 0; i < clrs; ++i) {
+    const f_pixel px = achv[i].acolor;
+    new_a += px.a * achv[i].adjusted_weight;
+    sum += achv[i].adjusted_weight;
+
+    /* find if there are opaque colors, in case we're supposed to preserve opacity exactly (ie_bug) */
+    if (px.a > maxa)
+      maxa = px.a;
+  }
+
+  if (sum)
+    new_a /= sum;
+
+    /** if there was at least one completely opaque color, "round" final color to opaque */
+  if (new_a >= min_opaque_val && maxa >= (255.0 / 256.0))
+    new_a = 1;
+
+  sum = 0;
+  // reverse iteration for cache locality with previous loop
+  for (int i = clrs - 1; i >= 0; i--) {
+    double tmp, weight = 1.0f;
+    f_pixel px = achv[i].acolor;
+
+    /* give more weight to colors that are further away from average
+       this is intended to prevent desaturation of images and fading of whites
+     */
+    tmp = (center.r - px.r);
+    weight += tmp * tmp;
+    tmp = (center.g - px.g);
+    weight += tmp * tmp;
+    tmp = (center.b - px.b);
+    weight += tmp * tmp;
+
+    weight *= achv[i].adjusted_weight;
+    sum += weight;
+
+    if (px.a) {
+      px.r /= px.a;
+      px.g /= px.a;
+      px.b /= px.a;
+    }
+
+    r += px.r * new_a * weight;
+    g += px.g * new_a * weight;
+    b += px.b * new_a * weight;
+    a += new_a * weight;
+  }
+
+  if (sum) {
+    a /= sum;
+    r /= sum;
+    g /= sum;
+    b /= sum;
+  }
+
+  assert (!isnan (r) && !isnan (g) && !isnan (b) && !isnan (a));
+
+  return (f_pixel) {
+  .r = r,.g = g,.b = b,.a = a};
+}
diff --git a/gst/dvbsubenc/libimagequant/mediancut.h b/gst/dvbsubenc/libimagequant/mediancut.h
new file mode 100644
index 000000000..e615c8dc9
--- /dev/null
+++ b/gst/dvbsubenc/libimagequant/mediancut.h
@@ -0,0 +1,2 @@
+
+LIQ_PRIVATE colormap *mediancut(histogram *hist, const float min_opaque_val, unsigned int newcolors, const double target_mse, const double max_mse, void* (*malloc)(size_t), void (*free)(void*));
diff --git a/gst/dvbsubenc/libimagequant/mempool.c b/gst/dvbsubenc/libimagequant/mempool.c
new file mode 100644
index 000000000..c2777d831
--- /dev/null
+++ b/gst/dvbsubenc/libimagequant/mempool.c
@@ -0,0 +1,69 @@
+
+#include "libimagequant.h"
+#include "mempool.h"
+#include <stdlib.h>
+#include <stdint.h>
+#include <assert.h>
+
+#define ALIGN_MASK 15UL
+#define MEMPOOL_RESERVED ((sizeof(struct mempool)+ALIGN_MASK) & ~ALIGN_MASK)
+
+struct mempool
+{
+  unsigned int used, size;
+  void *(*malloc) (size_t);
+  void (*free) (void *);
+  struct mempool *next;
+};
+LIQ_PRIVATE void *
+mempool_create (mempool * mptr, const unsigned int size, unsigned int max_size,
+    void *(*malloc) (size_t), void (*free) (void *))
+{
+  mempool old;
+  uintptr_t mptr_used_start;
+
+  if (*mptr && ((*mptr)->used + size) <= (*mptr)->size) {
+    unsigned int prevused = (*mptr)->used;
+    (*mptr)->used += (size + 15UL) & ~0xFUL;
+    return ((char *) (*mptr)) + prevused;
+  }
+
+  old = *mptr;
+  if (!max_size)
+    max_size = (1 << 17);
+  max_size = size + ALIGN_MASK > max_size ? size + ALIGN_MASK : max_size;
+
+  *mptr = malloc (MEMPOOL_RESERVED + max_size);
+  if (!*mptr)
+    return NULL;
+  **mptr = (struct mempool) {
+  .malloc = malloc,.free = free,.size = MEMPOOL_RESERVED + max_size,.used =
+        sizeof (struct mempool),.next = old,};
+  mptr_used_start = (uintptr_t) (*mptr) + (*mptr)->used;
+  (*mptr)->used += (ALIGN_MASK + 1 - (mptr_used_start & ALIGN_MASK)) & ALIGN_MASK;      // reserve bytes required to make subsequent allocations aligned
+  assert (!(((uintptr_t) (*mptr) + (*mptr)->used) & ALIGN_MASK));
+
+  return mempool_alloc (mptr, size, size);
+}
+
+LIQ_PRIVATE void *
+mempool_alloc (mempool * mptr, unsigned int size, unsigned int max_size)
+{
+  if (((*mptr)->used + size) <= (*mptr)->size) {
+    unsigned int prevused = (*mptr)->used;
+    (*mptr)->used += (size + ALIGN_MASK) & ~ALIGN_MASK;
+    return ((char *) (*mptr)) + prevused;
+  }
+
+  return mempool_create (mptr, size, max_size, (*mptr)->malloc, (*mptr)->free);
+}
+
+LIQ_PRIVATE void
+mempool_destroy (mempool m)
+{
+  while (m) {
+    mempool next = m->next;
+    m->free (m);
+    m = next;
+  }
+}
diff --git a/gst/dvbsubenc/libimagequant/mempool.h b/gst/dvbsubenc/libimagequant/mempool.h
new file mode 100644
index 000000000..e61b8dd08
--- /dev/null
+++ b/gst/dvbsubenc/libimagequant/mempool.h
@@ -0,0 +1,13 @@
+#ifndef MEMPOOL_H
+#define MEMPOOL_H
+
+#include <stddef.h>
+
+struct mempool;
+typedef struct mempool *mempool;
+
+LIQ_PRIVATE void* mempool_create(mempool *mptr, unsigned int size, unsigned int capacity, void* (*malloc)(size_t), void (*free)(void*));
+LIQ_PRIVATE void* mempool_alloc(mempool *mptr, unsigned int size, unsigned int capacity);
+LIQ_PRIVATE void mempool_destroy(mempool m);
+
+#endif
diff --git a/gst/dvbsubenc/libimagequant/nearest.c b/gst/dvbsubenc/libimagequant/nearest.c
new file mode 100644
index 000000000..c3e062cab
--- /dev/null
+++ b/gst/dvbsubenc/libimagequant/nearest.c
@@ -0,0 +1,261 @@
+
+#include "libimagequant.h"
+#include "pam.h"
+#include "nearest.h"
+#include "mempool.h"
+#include <stdlib.h>
+
+struct sorttmp
+{
+  float radius;
+  unsigned int index;
+};
+
+struct head
+{
+  // colors less than radius away from vantage_point color will have best match in candidates
+  f_pixel vantage_point;
+  float radius;
+  unsigned int num_candidates;
+  f_pixel *candidates_color;
+  unsigned short *candidates_index;
+};
+
+struct nearest_map
+{
+  const colormap *map;
+  float nearest_other_color_dist[256];
+  mempool mempool;
+  struct head heads[];
+};
+
+static float
+distance_from_nearest_other_color (const colormap * map, const unsigned int i)
+{
+  float second_best = MAX_DIFF;
+  for (unsigned int j = 0; j < map->colors; j++) {
+    float diff;
+
+    if (i == j)
+      continue;
+
+    diff = colordifference (map->palette[i].acolor, map->palette[j].acolor);
+    if (diff <= second_best) {
+      second_best = diff;
+    }
+  }
+  return second_best;
+}
+
+static int
+compareradius (const void *ap, const void *bp)
+{
+  float a = ((const struct sorttmp *) ap)->radius;
+  float b = ((const struct sorttmp *) bp)->radius;
+  return a > b ? 1 : (a < b ? -1 : 0);
+}
+
+static struct head
+build_head (f_pixel px, const colormap * map, unsigned int num_candidates,
+    mempool * m, float error_margin, bool skip_index[], unsigned int *skipped)
+{
+  struct sorttmp *colors = g_alloca (sizeof (struct sorttmp) * map->colors);
+  unsigned int colorsused, i;
+  struct head h;
+
+  colorsused = 0;
+
+  for (i = 0; i < map->colors; i++) {
+    if (skip_index[i])
+      continue;                 // colors in skip_index have been eliminated already in previous heads
+    colors[colorsused].index = i;
+    colors[colorsused].radius = colordifference (px, map->palette[i].acolor);
+    colorsused++;
+  }
+
+  qsort (colors, colorsused, sizeof (colors[0]), compareradius);
+  assert (colorsused < 2 || colors[0].radius <= colors[1].radius);      // closest first
+
+  num_candidates = MIN (colorsused, num_candidates);
+
+  h.candidates_color =
+      mempool_alloc (m, num_candidates * sizeof (h.candidates_color[0]), 0);
+  h.candidates_index =
+      mempool_alloc (m, num_candidates * sizeof (h.candidates_index[0]), 0);
+  h.vantage_point = px;
+  h.num_candidates = num_candidates;
+
+  for (i = 0; i < num_candidates; i++) {
+    h.candidates_color[i] = map->palette[colors[i].index].acolor;
+    h.candidates_index[i] = colors[i].index;
+  }
+  // if all colors within this radius are included in candidates, then there cannot be any other better match
+  // farther away from the vantage point than half of the radius. Due to alpha channel must assume pessimistic radius.
+  h.radius = min_colordifference (px, h.candidates_color[num_candidates - 1]) / 4.0f;   // /4 = half of radius, but radius is squared
+
+  for (i = 0; i < num_candidates; i++) {
+    // divide again as that's matching certain subset within radius-limited subset
+    // - 1/256 is a tolerance for miscalculation (seems like colordifference isn't exact)
+    if (colors[i].radius < h.radius / 4.f - error_margin) {
+      skip_index[colors[i].index] = true;
+      (*skipped)++;
+    }
+  }
+  return h;
+}
+
+static colormap *
+get_subset_palette (const colormap * map)
+{
+  unsigned int subset_size, i;
+  colormap *subset_palette;
+
+  if (map->subset_palette) {
+    return map->subset_palette;
+  }
+
+  subset_size = (map->colors + 3) / 4;
+  subset_palette = pam_colormap (subset_size, map->malloc, map->free);
+
+  for (i = 0; i < subset_size; i++) {
+    subset_palette->palette[i] = map->palette[i];
+  }
+
+  return subset_palette;
+}
+
+LIQ_PRIVATE struct nearest_map *
+nearest_init (const colormap * map, bool fast)
+{
+  colormap *subset_palette = get_subset_palette (map);
+  const unsigned int num_vantage_points =
+      map->colors > 16 ? MIN (map->colors / (fast ? 4 : 3),
+      subset_palette->colors) : 0;
+  const unsigned long heads_size = sizeof (struct head) * (num_vantage_points + 1);     // +1 is fallback head
+
+  const unsigned long mempool_size =
+      (sizeof (f_pixel) +
+      sizeof (unsigned int)) * subset_palette->colors * map->colors / 5 +
+      (1 << 14);
+  mempool m = NULL;
+  struct nearest_map *centroids = mempool_create (&m,
+      sizeof (*centroids) + heads_size /* heads array is appended to it */ ,
+      mempool_size, map->malloc, map->free);
+  unsigned int skipped;
+  const float error_margin = fast ? 0 : 8.f / 256.f / 256.f;
+  unsigned int h, i, j;
+  bool *skip_index;
+
+  centroids->mempool = m;
+
+  for (i = 0; i < map->colors; i++) {
+    const float dist = distance_from_nearest_other_color (map, i);
+    centroids->nearest_other_color_dist[i] = dist / 4.f;        // half of squared distance
+  }
+
+  centroids->map = map;
+
+  skipped = 0;
+  assert (map->colors > 0);
+
+  skip_index = g_alloca (sizeof (bool) * map->colors);
+
+  for (j = 0; j < map->colors; j++)
+    skip_index[j] = false;
+
+  // floats and colordifference calculations are not perfect
+  for (h = 0; h < num_vantage_points; h++) {
+    unsigned int num_candiadtes =
+        1 + (map->colors - skipped) / ((1 + num_vantage_points - h) / 2);
+
+    centroids->heads[h] =
+        build_head (subset_palette->palette[h].acolor, map, num_candiadtes,
+        &centroids->mempool, error_margin, skip_index, &skipped);
+    if (centroids->heads[h].num_candidates == 0) {
+      break;
+    }
+  }
+
+  // assumption that there is no better color within radius of vantage point color
+  // holds true only for colors within convex hull formed by palette colors.
+  // The fallback must contain all colors, since there are too many edge cases to cover.
+  if (!fast)
+    for (j = 0; j < map->colors; j++) {
+      skip_index[j] = false;
+    }
+
+  centroids->heads[h] = build_head ((f_pixel) {
+      0, 0, 0, 0}
+      , map, map->colors, &centroids->mempool, error_margin,
+      skip_index, &skipped);
+  centroids->heads[h].radius = MAX_DIFF;
+
+  // get_subset_palette could have created a copy
+  if (subset_palette != map->subset_palette) {
+    pam_freecolormap (subset_palette);
+  }
+
+  return centroids;
+}
+
+LIQ_PRIVATE unsigned int
+nearest_search (const struct nearest_map *centroids, const f_pixel px,
+    int likely_colormap_index, const float min_opaque_val, float *diff)
+{
+  const bool iebug = px.a > min_opaque_val;
+  const struct head *const heads = centroids->heads;
+  float guess_diff;
+  unsigned int i;
+
+  assert (likely_colormap_index < centroids->map->colors);
+
+  guess_diff =
+      colordifference (centroids->map->palette[likely_colormap_index].acolor,
+      px);
+  if (guess_diff < centroids->nearest_other_color_dist[likely_colormap_index]) {
+    if (diff)
+      *diff = guess_diff;
+    return likely_colormap_index;
+  }
+
+  for (i = 0; /* last head will always be selected */ ; i++) {
+    float vantage_point_dist = colordifference (px, heads[i].vantage_point);
+
+    if (vantage_point_dist <= heads[i].radius) {
+      unsigned int ind = 0;
+      float dist;
+
+      assert (heads[i].num_candidates);
+
+      dist = colordifference (px, heads[i].candidates_color[0]);
+
+      /* penalty for making holes in IE */
+      if (iebug && heads[i].candidates_color[0].a < 1) {
+        dist += 1.f / 1024.f;
+      }
+
+      for (unsigned int j = 1; j < heads[i].num_candidates; j++) {
+        float newdist = colordifference (px, heads[i].candidates_color[j]);
+
+        /* penalty for making holes in IE */
+        if (iebug && heads[i].candidates_color[j].a < 1) {
+          newdist += 1.f / 1024.f;
+        }
+
+        if (newdist < dist) {
+          dist = newdist;
+          ind = j;
+        }
+      }
+      if (diff)
+        *diff = dist;
+      return heads[i].candidates_index[ind];
+    }
+  }
+}
+
+LIQ_PRIVATE void
+nearest_free (struct nearest_map *centroids)
+{
+  mempool_destroy (centroids->mempool);
+}
diff --git a/gst/dvbsubenc/libimagequant/nearest.h b/gst/dvbsubenc/libimagequant/nearest.h
new file mode 100644
index 000000000..9745d9592
--- /dev/null
+++ b/gst/dvbsubenc/libimagequant/nearest.h
@@ -0,0 +1,8 @@
+//
+//  nearest.h
+//  pngquant
+//
+struct nearest_map;
+LIQ_PRIVATE struct nearest_map *nearest_init(const colormap *palette, const bool fast);
+LIQ_PRIVATE unsigned int nearest_search(const struct nearest_map *map, const f_pixel px, const int palette_index_guess, const float min_opaque, float *diff);
+LIQ_PRIVATE void nearest_free(struct nearest_map *map);
diff --git a/gst/dvbsubenc/libimagequant/pam.c b/gst/dvbsubenc/libimagequant/pam.c
new file mode 100644
index 000000000..a3815a48b
--- /dev/null
+++ b/gst/dvbsubenc/libimagequant/pam.c
@@ -0,0 +1,322 @@
+/* pam.c - pam (portable alpha map) utility library
+**
+** Copyright (C) 1989, 1991 by Jef Poskanzer.
+** Copyright (C) 1997, 2000, 2002 by Greg Roelofs; based on an idea by
+**                                Stefan Schneider.
+** © 2009-2013 by Kornel Lesinski.
+**
+** Permission to use, copy, modify, and distribute this software and its
+** documentation for any purpose and without fee is hereby granted, provided
+** that the above copyright notice appear in all copies and that both that
+** copyright notice and this permission notice appear in supporting
+** documentation.  This software is provided "as is" without express or
+** implied warranty.
+*/
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "libimagequant.h"
+#include "pam.h"
+#include "mempool.h"
+
+/* *INDENT-OFF* */
+LIQ_PRIVATE bool
+pam_computeacolorhash (struct acolorhash_table *acht,
+    const rgba_pixel * const pixels[], unsigned int cols, unsigned int rows,
+    const unsigned char *importance_map)
+/* *INDENT-ON* */
+{
+  const unsigned int maxacolors = acht->maxcolors, ignorebits =
+      acht->ignorebits;
+  const unsigned int channel_mask = 255U >> ignorebits << ignorebits;
+  const unsigned int channel_hmask = (255U >> ignorebits) ^ 0xFFU;
+  const unsigned int posterize_mask =
+      channel_mask << 24 | channel_mask << 16 | channel_mask << 8 |
+      channel_mask;
+  const unsigned int posterize_high_mask =
+      channel_hmask << 24 | channel_hmask << 16 | channel_hmask << 8 |
+      channel_hmask;
+  struct acolorhist_arr_head *const buckets = acht->buckets;
+
+  unsigned int colors = acht->colors;
+  const unsigned int hash_size = acht->hash_size;
+
+  const unsigned int stacksize =
+      sizeof (acht->freestack) / sizeof (acht->freestack[0]);
+  struct acolorhist_arr_item **freestack = acht->freestack;
+  unsigned int freestackp = acht->freestackp;
+
+  /* Go through the entire image, building a hash table of colors. */
+  for (unsigned int row = 0; row < rows; ++row) {
+
+    float boost = 1.0;
+    for (unsigned int col = 0; col < cols; ++col) {
+      union rgba_as_int px = { pixels[row][col] };
+      unsigned int hash;
+      struct acolorhist_arr_head *achl;
+
+      if (importance_map) {
+        boost = 0.5f + (double) *importance_map++ / 255.f;
+      }
+      // RGBA color is casted to long for easier hasing/comparisons
+      if (!px.rgba.a) {
+        // "dirty alpha" has different RGBA values that end up being the same fully transparent color
+        px.l = 0;
+        hash = 0;
+      } else {
+        // mask posterizes all 4 channels in one go
+        px.l =
+            (px.l & posterize_mask) | ((px.l & posterize_high_mask) >> (8 -
+                ignorebits));
+        // fancier hashing algorithms didn't improve much
+        hash = px.l % hash_size;
+      }
+
+      /* head of the hash function stores first 2 colors inline (achl->used = 1..2),
+         to reduce number of allocations of achl->other_items.
+       */
+      achl = &buckets[hash];
+      if (achl->inline1.color.l == px.l && achl->used) {
+        achl->inline1.perceptual_weight += boost;
+        continue;
+      }
+      if (achl->used) {
+        if (achl->used > 1) {
+          struct acolorhist_arr_item *other_items;
+          unsigned int i = 0;
+          struct acolorhist_arr_item *new_items;
+          unsigned int capacity;
+
+          if (achl->inline2.color.l == px.l) {
+            achl->inline2.perceptual_weight += boost;
+            continue;
+          }
+          // other items are stored as an array (which gets reallocated if needed)
+          other_items = achl->other_items;
+          for (i = 0; i < achl->used - 2; i++) {
+            if (other_items[i].color.l == px.l) {
+              other_items[i].perceptual_weight += boost;
+              goto continue_outer_loop;
+            }
+          }
+
+          // the array was allocated with spare items
+          if (i < achl->capacity) {
+            other_items[i] = (struct acolorhist_arr_item) {
+            .color = px,.perceptual_weight = boost,};
+            achl->used++;
+            ++colors;
+            continue;
+          }
+
+          if (++colors > maxacolors) {
+            acht->colors = colors;
+            acht->freestackp = freestackp;
+            return false;
+          }
+
+          if (!other_items) {   // there was no array previously, alloc "small" array
+            capacity = 8;
+            if (freestackp <= 0) {
+              // estimate how many colors are going to be + headroom
+              const int mempool_size =
+                  ((acht->rows + rows - row) * 2 * colors / (acht->rows + row +
+                      1) + 1024) * sizeof (struct acolorhist_arr_item);
+              new_items =
+                  mempool_alloc (&acht->mempool,
+                  sizeof (struct acolorhist_arr_item) * capacity, mempool_size);
+            } else {
+              // freestack stores previously freed (reallocated) arrays that can be reused
+              // (all pesimistically assumed to be capacity = 8)
+              new_items = freestack[--freestackp];
+            }
+          } else {
+            // simply reallocs and copies array to larger capacity
+            capacity = achl->capacity * 2 + 16;
+            if (freestackp < stacksize - 1) {
+              freestack[freestackp++] = other_items;
+            }
+
+            {
+              const int mempool_size =
+                  ((acht->rows + rows - row) * 2 * colors / (acht->rows + row +
+                      1) + 32 * capacity) * sizeof (struct acolorhist_arr_item);
+              new_items =
+                  mempool_alloc (&acht->mempool,
+                  sizeof (struct acolorhist_arr_item) * capacity, mempool_size);
+            }
+            if (!new_items)
+              return false;
+            memcpy (new_items, other_items,
+                sizeof (other_items[0]) * achl->capacity);
+          }
+
+          achl->other_items = new_items;
+          achl->capacity = capacity;
+          new_items[i] = (struct acolorhist_arr_item) {
+          .color = px,.perceptual_weight = boost,};
+          achl->used++;
+        } else {
+          // these are elses for first checks whether first and second inline-stored colors are used
+          achl->inline2.color.l = px.l;
+          achl->inline2.perceptual_weight = boost;
+          achl->used = 2;
+          ++colors;
+        }
+      } else {
+        achl->inline1.color.l = px.l;
+        achl->inline1.perceptual_weight = boost;
+        achl->used = 1;
+        ++colors;
+      }
+
+    continue_outer_loop:;
+    }
+
+  }
+  acht->colors = colors;
+  acht->cols = cols;
+  acht->rows += rows;
+  acht->freestackp = freestackp;
+  return true;
+}
+
+LIQ_PRIVATE struct acolorhash_table *
+pam_allocacolorhash (unsigned int maxcolors, unsigned int surface,
+    unsigned int ignorebits, void *(*malloc) (size_t), void (*free) (void *))
+{
+  const unsigned int estimated_colors =
+      MIN (maxcolors, surface / (ignorebits + (surface > 512 * 512 ? 5 : 4)));
+  const unsigned int hash_size =
+      estimated_colors < 66000 ? 6673 : (estimated_colors <
+      200000 ? 12011 : 24019);
+
+  mempool m = NULL;
+  const unsigned int buckets_size =
+      hash_size * sizeof (struct acolorhist_arr_head);
+  const unsigned int mempool_size =
+      sizeof (struct acolorhash_table) + buckets_size +
+      estimated_colors * sizeof (struct acolorhist_arr_item);
+  struct acolorhash_table *t =
+      mempool_create (&m, sizeof (*t) + buckets_size, mempool_size, malloc,
+      free);
+  if (!t)
+    return NULL;
+  *t = (struct acolorhash_table) {
+  .mempool = m,.hash_size = hash_size,.maxcolors = maxcolors,.ignorebits =
+        ignorebits,};
+  memset (t->buckets, 0, hash_size * sizeof (struct acolorhist_arr_head));
+  return t;
+}
+
+#define PAM_ADD_TO_HIST(entry) { \
+    hist->achv[j].acolor = to_f(gamma_lut, entry.color.rgba); \
+    total_weight += hist->achv[j].adjusted_weight = hist->achv[j].perceptual_weight = MIN(entry.perceptual_weight, max_perceptual_weight); \
+    ++j; \
+}
+
+LIQ_PRIVATE histogram *
+pam_acolorhashtoacolorhist (const struct acolorhash_table * acht,
+    const double gamma, void *(*malloc) (size_t), void (*free) (void *))
+{
+  histogram *hist = malloc (sizeof (hist[0]));
+  float gamma_lut[256];
+  float max_perceptual_weight;
+  double total_weight;
+  unsigned int i, j, k;
+
+  if (!hist || !acht)
+    return NULL;
+  *hist = (histogram) {
+  .achv = malloc (acht->colors * sizeof (hist->achv[0])),.size =
+        acht->colors,.free = free,.ignorebits = acht->ignorebits,};
+  if (!hist->achv)
+    return NULL;
+
+  to_f_set_gamma (gamma_lut, gamma);
+
+  /* Limit perceptual weight to 1/10th of the image surface area to prevent
+     a single color from dominating all others. */
+  max_perceptual_weight = 0.1f * acht->cols * acht->rows;
+  total_weight = 0;
+
+  for (j = 0, i = 0; i < acht->hash_size; ++i) {
+    const struct acolorhist_arr_head *const achl = &acht->buckets[i];
+    if (achl->used) {
+      PAM_ADD_TO_HIST (achl->inline1);
+
+      if (achl->used > 1) {
+        PAM_ADD_TO_HIST (achl->inline2);
+
+        for (k = 0; k < achl->used - 2; k++) {
+          PAM_ADD_TO_HIST (achl->other_items[k]);
+        }
+      }
+    }
+  }
+
+  hist->total_perceptual_weight = total_weight;
+  return hist;
+}
+
+
+LIQ_PRIVATE void
+pam_freeacolorhash (struct acolorhash_table *acht)
+{
+  mempool_destroy (acht->mempool);
+}
+
+LIQ_PRIVATE void
+pam_freeacolorhist (histogram * hist)
+{
+  hist->free (hist->achv);
+  hist->free (hist);
+}
+
+LIQ_PRIVATE colormap *
+pam_colormap (unsigned int colors, void *(*malloc) (size_t),
+    void (*free) (void *))
+{
+  const size_t colors_size = colors * sizeof (colormap_item);
+  colormap *map;
+
+  assert (colors > 0 && colors < 65536);
+
+  map = malloc (sizeof (colormap) + colors_size);
+  if (!map)
+    return NULL;
+  *map = (colormap) {
+  .malloc = malloc,.free = free,.subset_palette = NULL,.colors = colors,};
+  memset (map->palette, 0, colors_size);
+  return map;
+}
+
+LIQ_PRIVATE colormap *
+pam_duplicate_colormap (colormap * map)
+{
+  colormap *dupe = pam_colormap (map->colors, map->malloc, map->free);
+  for (unsigned int i = 0; i < map->colors; i++) {
+    dupe->palette[i] = map->palette[i];
+  }
+  if (map->subset_palette) {
+    dupe->subset_palette = pam_duplicate_colormap (map->subset_palette);
+  }
+  return dupe;
+}
+
+LIQ_PRIVATE void
+pam_freecolormap (colormap * c)
+{
+  if (c->subset_palette)
+    pam_freecolormap (c->subset_palette);
+  c->free (c);
+}
+
+LIQ_PRIVATE void
+to_f_set_gamma (float gamma_lut[], const double gamma)
+{
+  for (int i = 0; i < 256; i++) {
+    gamma_lut[i] = pow ((double) i / 255.0, internal_gamma / gamma);
+  }
+}
diff --git a/gst/dvbsubenc/libimagequant/pam.h b/gst/dvbsubenc/libimagequant/pam.h
new file mode 100644
index 000000000..ef82719dc
--- /dev/null
+++ b/gst/dvbsubenc/libimagequant/pam.h
@@ -0,0 +1,298 @@
+/* pam.h - pam (portable alpha map) utility library
+ **
+ ** Colormap routines.
+ **
+ ** Copyright (C) 1989, 1991 by Jef Poskanzer.
+ ** Copyright (C) 1997 by Greg Roelofs.
+ **
+ ** Permission to use, copy, modify, and distribute this software and its
+ ** documentation for any purpose and without fee is hereby granted, provided
+ ** that the above copyright notice appear in all copies and that both that
+ ** copyright notice and this permission notice appear in supporting
+ ** documentation.  This software is provided "as is" without express or
+ ** implied warranty.
+ */
+
+#ifndef PAM_H
+#define PAM_H
+
+#include <math.h>
+#include <assert.h>
+#include <stdlib.h>
+#include <stdbool.h>
+
+#ifndef MAX
+#  define MAX(a,b)  ((a) > (b)? (a) : (b))
+#  define MIN(a,b)  ((a) < (b)? (a) : (b))
+#endif
+
+#define MAX_DIFF 1e20
+
+#ifndef USE_SSE
+#  if defined(__SSE__) && (defined(WIN32) || defined(__WIN32__))
+#    define USE_SSE 1
+#  else
+#    define USE_SSE 0
+#  endif
+#endif
+
+#if USE_SSE
+#  include <xmmintrin.h>
+#  ifdef _MSC_VER
+#    include <intrin.h>
+#    define SSE_ALIGN
+#  else
+#    define SSE_ALIGN __attribute__ ((aligned (16)))
+#    if defined(__i386__) && defined(__PIC__)
+#       define cpuid(func,ax,bx,cx,dx)\
+        __asm__ __volatile__ ( \
+        "push %%ebx\n" \
+        "cpuid\n" \
+        "mov %%ebx, %1\n" \
+        "pop %%ebx\n" \
+        : "=a" (ax), "=r" (bx), "=c" (cx), "=d" (dx) \
+        : "a" (func));
+#    else
+#       define cpuid(func,ax,bx,cx,dx)\
+        __asm__ __volatile__ ("cpuid":\
+        "=a" (ax), "=b" (bx), "=c" (cx), "=d" (dx) : "a" (func));
+#    endif
+#endif
+#else
+#  define SSE_ALIGN
+#endif
+
+#if defined(__GNUC__) || defined (__llvm__)
+#define ALWAYS_INLINE __attribute__((always_inline)) inline
+#define NEVER_INLINE __attribute__ ((noinline))
+#elif defined(_MSC_VER)
+#define inline __inline
+#define restrict __restrict
+#define ALWAYS_INLINE __forceinline
+#define NEVER_INLINE __declspec(noinline)
+#else
+#define ALWAYS_INLINE inline
+#define NEVER_INLINE
+#endif
+
+/* from pam.h */
+
+typedef struct {
+    unsigned char r, g, b, a;
+} rgba_pixel;
+
+typedef struct {
+    float a, r, g, b;
+} SSE_ALIGN f_pixel;
+
+static const double internal_gamma = 0.5499;
+
+LIQ_PRIVATE void to_f_set_gamma(float gamma_lut[], const double gamma);
+
+/**
+ Converts 8-bit color to internal gamma and premultiplied alpha.
+ (premultiplied color space is much better for blending of semitransparent colors)
+ */
+ALWAYS_INLINE static f_pixel to_f(const float gamma_lut[], const rgba_pixel px);
+inline static f_pixel to_f(const float gamma_lut[], const rgba_pixel px)
+{
+    float a = px.a/255.f;
+
+    return (f_pixel) {
+        .a = a,
+        .r = gamma_lut[px.r]*a,
+        .g = gamma_lut[px.g]*a,
+        .b = gamma_lut[px.b]*a,
+    };
+}
+
+inline static rgba_pixel to_rgb(const float gamma, const f_pixel px)
+{
+    float r, g, b, a;
+
+    if (px.a < 1.f/256.f) {
+        return (rgba_pixel){0,0,0,0};
+    }
+
+    r = px.r / px.a,
+    g = px.g / px.a,
+    b = px.b / px.a,
+    a = px.a;
+
+    r = powf(r, gamma/internal_gamma);
+    g = powf(g, gamma/internal_gamma);
+    b = powf(b, gamma/internal_gamma);
+
+    // 256, because numbers are in range 1..255.9999… rounded down
+    r *= 256.f;
+    g *= 256.f;
+    b *= 256.f;
+    a *= 256.f;
+
+    return (rgba_pixel){
+        .r = r>=255.f ? 255 : r,
+        .g = g>=255.f ? 255 : g,
+        .b = b>=255.f ? 255 : b,
+        .a = a>=255.f ? 255 : a,
+    };
+}
+
+ALWAYS_INLINE static double colordifference_ch(const double x, const double y, const double alphas);
+inline static double colordifference_ch(const double x, const double y, const double alphas)
+{
+    // maximum of channel blended on white, and blended on black
+    // premultiplied alpha and backgrounds 0/1 shorten the formula
+    const double black = x-y, white = black+alphas;
+    return black*black + white*white;
+}
+
+ALWAYS_INLINE static float colordifference_stdc(const f_pixel px, const f_pixel py);
+inline static float colordifference_stdc(const f_pixel px, const f_pixel py)
+{
+    // px_b.rgb = px.rgb + 0*(1-px.a) // blend px on black
+    // px_b.a   = px.a   + 1*(1-px.a)
+    // px_w.rgb = px.rgb + 1*(1-px.a) // blend px on white
+    // px_w.a   = px.a   + 1*(1-px.a)
+
+    // px_b.rgb = px.rgb              // difference same as in opaque RGB
+    // px_b.a   = 1
+    // px_w.rgb = px.rgb - px.a       // difference simplifies to formula below
+    // px_w.a   = 1
+
+    // (px.rgb - px.a) - (py.rgb - py.a)
+    // (px.rgb - py.rgb) + (py.a - px.a)
+
+    const double alphas = py.a-px.a;
+    return colordifference_ch(px.r, py.r, alphas) +
+           colordifference_ch(px.g, py.g, alphas) +
+           colordifference_ch(px.b, py.b, alphas);
+}
+
+ALWAYS_INLINE static double min_colordifference_ch(const double x, const double y, const double alphas);
+inline static double min_colordifference_ch(const double x, const double y, const double alphas)
+{
+    const double black = x-y, white = black+alphas;
+    return MIN(black*black , white*white) * 2.f;
+}
+
+/* least possible difference between colors (difference varies depending on background they're blended on) */
+ALWAYS_INLINE static float min_colordifference(const f_pixel px, const f_pixel py);
+inline static float min_colordifference(const f_pixel px, const f_pixel py)
+{
+    const double alphas = py.a-px.a;
+    return min_colordifference_ch(px.r, py.r, alphas) +
+           min_colordifference_ch(px.g, py.g, alphas) +
+           min_colordifference_ch(px.b, py.b, alphas);
+}
+
+ALWAYS_INLINE static float colordifference(f_pixel px, f_pixel py);
+inline static float colordifference(f_pixel px, f_pixel py)
+{
+#if USE_SSE
+    __m128 alphas, onblack, onwhite;
+    const __m128 vpx = _mm_load_ps((const float*)&px);
+    const __m128 vpy = _mm_load_ps((const float*)&py);
+    float res;
+
+    // y.a - x.a
+    alphas = _mm_sub_ss(vpy, vpx);
+    alphas = _mm_shuffle_ps(alphas,alphas,0); // copy first to all four
+
+    onblack = _mm_sub_ps(vpx, vpy); // x - y
+    onwhite = _mm_add_ps(onblack, alphas); // x - y + (y.a - x.a)
+
+    onblack = _mm_mul_ps(onblack, onblack);
+    onwhite = _mm_mul_ps(onwhite, onwhite);
+
+    {
+      const __m128 max = _mm_add_ps(onwhite, onblack);
+
+      // add rgb, not a
+      const __m128 maxhl = _mm_movehl_ps(max, max);
+      const __m128 tmp = _mm_add_ps(max, maxhl);
+      const __m128 sum = _mm_add_ss(maxhl, _mm_shuffle_ps(tmp, tmp, 1));
+
+      res = _mm_cvtss_f32(sum);
+    }
+    assert(fabs(res - colordifference_stdc(px,py)) < 0.001);
+    return res;
+#else
+    return colordifference_stdc(px,py);
+#endif
+}
+
+/* from pamcmap.h */
+union rgba_as_int {
+    rgba_pixel rgba;
+    unsigned int l;
+};
+
+typedef struct {
+    f_pixel acolor;
+    float adjusted_weight,   // perceptual weight changed to tweak how mediancut selects colors
+          perceptual_weight; // number of pixels weighted by importance of different areas of the picture
+
+    float color_weight;      // these two change every time histogram subset is sorted
+    union {
+        unsigned int sort_value;
+        unsigned char likely_colormap_index;
+    } tmp;
+} hist_item;
+
+typedef struct {
+    hist_item *achv;
+    void (*free)(void*);
+    double total_perceptual_weight;
+    unsigned int size;
+    unsigned int ignorebits;
+} histogram;
+
+typedef struct {
+    f_pixel acolor;
+    float popularity;
+    bool fixed; // if true it's user-supplied and must not be changed (e.g in voronoi iteration)
+} colormap_item;
+
+typedef struct colormap {
+    unsigned int colors;
+    void* (*malloc)(size_t);
+    void (*free)(void*);
+    struct colormap *subset_palette;
+    colormap_item palette[];
+} colormap;
+
+struct acolorhist_arr_item {
+    union rgba_as_int color;
+    float perceptual_weight;
+};
+
+struct acolorhist_arr_head {
+    unsigned int used, capacity;
+    struct {
+        union rgba_as_int color;
+        float perceptual_weight;
+    } inline1, inline2;
+    struct acolorhist_arr_item *other_items;
+};
+
+struct acolorhash_table {
+    struct mempool *mempool;
+    unsigned int ignorebits, maxcolors, colors, cols, rows;
+    unsigned int hash_size;
+    unsigned int freestackp;
+    struct acolorhist_arr_item *freestack[512];
+    struct acolorhist_arr_head buckets[];
+};
+
+LIQ_PRIVATE void pam_freeacolorhash(struct acolorhash_table *acht);
+LIQ_PRIVATE struct acolorhash_table *pam_allocacolorhash(unsigned int maxcolors, unsigned int surface, unsigned int ignorebits, void* (*malloc)(size_t), void (*free)(void*));
+LIQ_PRIVATE histogram *pam_acolorhashtoacolorhist(const struct acolorhash_table *acht, const double gamma, void* (*malloc)(size_t), void (*free)(void*));
+LIQ_PRIVATE bool pam_computeacolorhash(struct acolorhash_table *acht, const rgba_pixel *const pixels[], unsigned int cols, unsigned int rows, const unsigned char *importance_map);
+
+LIQ_PRIVATE void pam_freeacolorhist(histogram *h);
+
+LIQ_PRIVATE colormap *pam_colormap(unsigned int colors, void* (*malloc)(size_t), void (*free)(void*));
+LIQ_PRIVATE colormap *pam_duplicate_colormap(colormap *map);
+LIQ_PRIVATE void pam_freecolormap(colormap *c);
+
+#endif
diff --git a/gst/dvbsubenc/libimagequant/viter.c b/gst/dvbsubenc/libimagequant/viter.c
new file mode 100644
index 000000000..489f3657c
--- /dev/null
+++ b/gst/dvbsubenc/libimagequant/viter.c
@@ -0,0 +1,111 @@
+
+#include "libimagequant.h"
+#include "pam.h"
+#include "viter.h"
+#include "nearest.h"
+#include <stdlib.h>
+#include <string.h>
+
+#ifdef _OPENMP
+#include <omp.h>
+#else
+#define omp_get_max_threads() 1
+#define omp_get_thread_num() 0
+#endif
+
+/*
+ * Voronoi iteration: new palette color is computed from weighted average of colors that map to that palette entry.
+ */
+LIQ_PRIVATE void
+viter_init (const colormap * map, const unsigned int max_threads,
+    viter_state average_color[])
+{
+  memset (average_color, 0,
+      sizeof (average_color[0]) * (VITER_CACHE_LINE_GAP +
+          map->colors) * max_threads);
+}
+
+LIQ_PRIVATE void
+viter_update_color (const f_pixel acolor, const float value,
+    const colormap * map, unsigned int match, const unsigned int thread,
+    viter_state average_color[])
+{
+  match += thread * (VITER_CACHE_LINE_GAP + map->colors);
+  average_color[match].a += acolor.a * value;
+  average_color[match].r += acolor.r * value;
+  average_color[match].g += acolor.g * value;
+  average_color[match].b += acolor.b * value;
+  average_color[match].total += value;
+}
+
+LIQ_PRIVATE void
+viter_finalize (colormap * map, const unsigned int max_threads,
+    const viter_state average_color[])
+{
+  for (unsigned int i = 0; i < map->colors; i++) {
+    double a = 0, r = 0, g = 0, b = 0, total = 0;
+
+    // Aggregate results from all threads
+    for (unsigned int t = 0; t < max_threads; t++) {
+      const unsigned int offset = (VITER_CACHE_LINE_GAP + map->colors) * t + i;
+
+      a += average_color[offset].a;
+      r += average_color[offset].r;
+      g += average_color[offset].g;
+      b += average_color[offset].b;
+      total += average_color[offset].total;
+    }
+
+    if (total && !map->palette[i].fixed) {
+      map->palette[i].acolor = (f_pixel) {
+      .a = a / total,.r = r / total,.g = g / total,.b = b / total,};
+    } else {
+      total = i / 1024.0;
+    }
+    map->palette[i].popularity = total;
+  }
+}
+
+LIQ_PRIVATE double
+viter_do_iteration (histogram * hist, colormap * const map,
+    const float min_opaque_val, viter_callback callback,
+    const bool fast_palette)
+{
+  viter_state *average_color;
+  const unsigned int max_threads = omp_get_max_threads ();
+  double total_diff = 0;
+
+  average_color =
+      g_alloca (sizeof (viter_state) * (VITER_CACHE_LINE_GAP +
+          map->colors) * max_threads);
+
+  viter_init (map, max_threads, average_color);
+  {
+    struct nearest_map *const n = nearest_init (map, fast_palette);
+    hist_item *const achv = hist->achv;
+    const int hist_size = hist->size;
+    int j;
+
+#pragma omp parallel for if (hist_size > 3000) \
+            schedule(static) default(none) shared(average_color,callback) reduction(+:total_diff)
+    for (j = 0; j < hist_size; j++) {
+      float diff;
+      unsigned int match =
+          nearest_search (n, achv[j].acolor, achv[j].tmp.likely_colormap_index,
+          min_opaque_val, &diff);
+      achv[j].tmp.likely_colormap_index = match;
+      total_diff += diff * achv[j].perceptual_weight;
+
+      viter_update_color (achv[j].acolor, achv[j].perceptual_weight, map, match,
+          omp_get_thread_num (), average_color);
+
+      if (callback)
+        callback (&achv[j], diff);
+    }
+
+    nearest_free (n);
+  }
+  viter_finalize (map, max_threads, average_color);
+
+  return total_diff / hist->total_perceptual_weight;
+}
diff --git a/gst/dvbsubenc/libimagequant/viter.h b/gst/dvbsubenc/libimagequant/viter.h
new file mode 100644
index 000000000..c1a5b391a
--- /dev/null
+++ b/gst/dvbsubenc/libimagequant/viter.h
@@ -0,0 +1,19 @@
+
+#ifndef VITER_H
+#define VITER_H
+
+// Spread memory touched by different threads at least 64B apart which I assume is the cache line size. This should avoid memory write contention.
+#define VITER_CACHE_LINE_GAP ((64+sizeof(viter_state)-1)/sizeof(viter_state))
+
+typedef struct {
+    double a, r, g, b, total;
+} viter_state;
+
+typedef void (*viter_callback)(hist_item *item, float diff);
+
+LIQ_PRIVATE void viter_init(const colormap *map, const unsigned int max_threads, viter_state state[]);
+LIQ_PRIVATE void viter_update_color(const f_pixel acolor, const float value, const colormap *map, unsigned int match, const unsigned int thread, viter_state average_color[]);
+LIQ_PRIVATE void viter_finalize(colormap *map, const unsigned int max_threads, const viter_state state[]);
+LIQ_PRIVATE double viter_do_iteration(histogram *hist, colormap *const map, const float min_opaque_val, viter_callback callback, const bool fast_palette);
+
+#endif
diff --git a/gst/dvbsubenc/meson.build b/gst/dvbsubenc/meson.build
new file mode 100644
index 000000000..5f37550cc
--- /dev/null
+++ b/gst/dvbsubenc/meson.build
@@ -0,0 +1,30 @@
+subenc_sources = [
+  'gstdvbsubenc.c',
+  'gstdvbsubenc-util.c',
+]
+
+libimagequant_sources = [
+  'libimagequant/blur.c',
+  'libimagequant/libimagequant.c',
+  'libimagequant/mediancut.c',
+  'libimagequant/mempool.c',
+  'libimagequant/nearest.c',
+  'libimagequant/pam.c',
+  'libimagequant/viter.c'
+]
+
+local_c_args = ['-DLIQ_PRIVATE='] # GStreamer already has the right hidden symbol visibility
+if cc.has_argument ('-Wno-unknown-pragmas')
+  local_c_args += ['-Wno-unknown-pragmas']
+endif
+
+gstdvbsubenc = library('gstdvbsubenc',
+  subenc_sources + libimagequant_sources,
+  c_args : gst_plugins_bad_args + local_c_args,
+  include_directories : [configinc, libsinc],
+  dependencies : [gstbase_dep, gstvideo_dep, libm],
+  install : true,
+  install_dir : plugins_install_dir,
+)
+pkgconfig.generate(gstdvbsubenc, install_dir : plugins_pkgconfig_install_dir)
+plugins += [gstdvbsubenc]
author	Jan Schmidt <jan@centricular.com>	2020-04-07 21:47:22 +1000
committer	Jan Schmidt <jan@centricular.com>	2020-06-17 12:50:13 +1000
commit	1cf3cae5e1e35c2e8eb8a919db77f2970e743676 (patch)
tree	fc0411d02fc3ad55378912f49b85fb6406db99f0 /gst/dvbsubenc
parent	f899728dd45bbf154123bca3c1c8c9e2b6af5c24 (diff)
download	gstreamer-plugins-bad-1cf3cae5e1e35c2e8eb8a919db77f2970e743676.tar.gz