Merge remote-tracking branch 'qatar/master' into master

* qatar/master: (27 commits) ac3enc: fix LOCAL_ALIGNED usage in count_mantissa_bits() ac3dsp: do not use the ff_* prefix when referencing ff_ac3_bap_bits. ac3dsp: fix loop condition in ac3_update_bap_counts_c() ARM: unbreak build ac3enc: modify mantissa bit counting to keep bap counts for all values of bap instead of just 0 to 4. ac3enc: split mantissa bit counting into a separate function. ac3enc: store per-block/channel bap pointers by reference block in a 2D array rather than in the AC3Block struct. get_bits: add av_unused tag to cache variable sws: replace all long with int. ARM: aacdec: fix constraints on inline asm ARM: remove unnecessary volatile from inline asm ARM: add "cc" clobbers to inline asm where needed ARM: improve FASTDIV asm ac3enc: use LOCAL_ALIGNED macro APIchanges: fill in git hash for av_get_pix_fmt_name (0420bd7). lavu: add av_get_pix_fmt_name() convenience function cmdutils: remove OPT_FUNC2 swscale: fix crash in bilinear scaling. vpxenc: add VP8E_SET_STATIC_THRESHOLD mapping webm: support stereo videos in matroska/webm muxer ... Conflicts: Changelog cmdutils.c cmdutils.h doc/APIchanges doc/muxers.texi ffmpeg.c ffplay.c libavcodec/ac3enc.c libavcodec/ac3enc_float.c libavcodec/avcodec.h libavcodec/get_bits.h libavcodec/libvpxenc.c libavcodec/version.h libavdevice/libdc1394.c libavformat/matroskaenc.c libavutil/avutil.h libswscale/rgb2rgb.c libswscale/swscale.c libswscale/swscale_template.c libswscale/x86/swscale_template.c Merged-by: Michael Niedermayer <michaelni@gmx.at>
author: Michael Niedermayer <michaelni@gmx.at> 2011-05-29 02:55:19 +0200
committer: Michael Niedermayer <michaelni@gmx.at> 2011-05-29 03:34:35 +0200
commit: b8a43bc1b50f409414493a05f6c4b7895ca4ddf9 (patch)
tree: 95dda1b7289aac9bdb1f457417baf9515aa4383a
parent: 39d607e5bbc25ad9629683702b510e865434ef21 (diff)
parent: 90da52f01f8b6c22af22a002eb226989b1cf7ef8 (diff)
download: ffmpeg-b8a43bc1b50f409414493a05f6c4b7895ca4ddf9.tar.gz
45 files changed, 1337 insertions, 915 deletions
diff --git a/Changelog b/Changelog
index 37ccd06106..a2018ddb7d 100644
--- a/Changelog
+++ b/Changelog
@@ -9,6 +9,7 @@ version <next>:
 - mpeg2 aspect ratio dection fixed
 - libxvid aspect pickiness fixed
 - Frame multithreaded decoding
+- E-AC-3 audio encoder
 - Lots of deprecated API cruft removed
 - fft and imdct optimizations for AVX (Sandy Bridge) processors
 - showinfo filter added
diff --git a/configure b/configure
index 2c4f2e81e1..0e57e3949c 100755
--- a/configure
+++ b/configure
@@ -1268,6 +1268,7 @@ dca_decoder_select="mdct"
 dnxhd_encoder_select="aandct"
 dxa_decoder_select="zlib"
 eac3_decoder_select="ac3_decoder"
+eac3_encoder_select="mdct ac3dsp"
 eamad_decoder_select="aandct"
 eatgq_decoder_select="aandct"
 eatqi_decoder_select="aandct"
diff --git a/doc/APIchanges b/doc/APIchanges
index d853cfecbd..f88b7af7c1 100644
--- a/doc/APIchanges
+++ b/doc/APIchanges
@@ -30,6 +30,10 @@ API changes, most recent first:
 2011-05-XX - XXXXXX - lavfi 2.6.0 - avcodec.h
   Add avfilter_get_video_buffer_ref_from_frame() to libavfilter/avcodec.h.
 
+2011-05-28 - 0420bd7 - lavu 51.2.0 - pixdesc.h
+  Add av_get_pix_fmt_name() in libavutil/pixdesc.h, and deprecate
+  avcodec_get_pix_fmt_name() in libavcodec/avcodec.h in its favor.
+
 2011-05-25 - 30315a8 - lavf 53.1.0 - avformat.h
   Add fps_probe_size to AVFormatContext.
 
diff --git a/doc/general.texi b/doc/general.texi
index f965e05b8f..e5e76db056 100644
--- a/doc/general.texi
+++ b/doc/general.texi
@@ -617,7 +617,7 @@ following image formats are supported:
     @tab Used in Origin's Wing Commander IV AVI files.
 @item DSP Group TrueSpeech   @tab     @tab  X
 @item DV audio               @tab     @tab  X
-@item Enhanced AC-3          @tab     @tab  X
+@item Enhanced AC-3          @tab  X  @tab  X
 @item FLAC (Free Lossless Audio Codec)  @tab  X  @tab  IX
 @item GSM                    @tab  E  @tab  X
     @tab encoding supported through external library libgsm
diff --git a/ffmpeg.c b/ffmpeg.c
index 8659b1f5ba..dc435aba2a 100644
--- a/ffmpeg.c
+++ b/ffmpeg.c
@@ -4395,10 +4395,10 @@ static const OptionDef options[] = {
     { "copyinkf", OPT_BOOL | OPT_EXPERT, {(void*)&copy_initial_nonkeyframes}, "copy initial non-keyframes" },
 
     /* video options */
-    { "b",  HAS_ARG | OPT_VIDEO, {(void*)opt_bitrate}, "set bitrate (in bits/s)", "bitrate" },
-    { "vb",  HAS_ARG | OPT_VIDEO, {(void*)opt_bitrate}, "set bitrate (in bits/s)", "bitrate" },
+    { "b", HAS_ARG | OPT_VIDEO, {(void*)opt_bitrate}, "set bitrate (in bits/s)", "bitrate" },
+    { "vb", HAS_ARG | OPT_VIDEO, {(void*)opt_bitrate}, "set bitrate (in bits/s)", "bitrate" },
     { "vframes", OPT_INT | HAS_ARG | OPT_VIDEO, {(void*)&max_frames[AVMEDIA_TYPE_VIDEO]}, "set the number of video frames to record", "number" },
-    { "r",  HAS_ARG | OPT_VIDEO, {(void*)opt_frame_rate}, "set frame rate (Hz value, fraction or abbreviation)", "rate" },
+    { "r", HAS_ARG | OPT_VIDEO, {(void*)opt_frame_rate}, "set frame rate (Hz value, fraction or abbreviation)", "rate" },
     { "s", HAS_ARG | OPT_VIDEO, {(void*)opt_frame_size}, "set frame size (WxH or abbreviation)", "size" },
     { "aspect", HAS_ARG | OPT_VIDEO, {(void*)opt_frame_aspect_ratio}, "set aspect ratio (4:3, 16:9 or 1.3333, 1.7777)", "aspect" },
     { "pix_fmt", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_frame_pix_fmt}, "set pixel format, 'list' as argument shows all the pixel formats supported", "format" },
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index c6920adba0..95d08eaaa8 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -127,6 +127,7 @@ OBJS-$(CONFIG_DVVIDEO_DECODER)         += dv.o dvdata.o
 OBJS-$(CONFIG_DVVIDEO_ENCODER)         += dv.o dvdata.o
 OBJS-$(CONFIG_DXA_DECODER)             += dxa.o
 OBJS-$(CONFIG_EAC3_DECODER)            += eac3dec.o eac3dec_data.o
+OBJS-$(CONFIG_EAC3_ENCODER)            += ac3enc_float.o ac3tab.o ac3.o kbdwin.o
 OBJS-$(CONFIG_EACMV_DECODER)           += eacmv.o
 OBJS-$(CONFIG_EAMAD_DECODER)           += eamad.o eaidct.o mpeg12.o \
                                           mpeg12data.o mpegvideo.o  \
diff --git a/libavcodec/ac3dsp.c b/libavcodec/ac3dsp.c
index 4ec0f2a19c..619addc3d5 100644
--- a/libavcodec/ac3dsp.c
+++ b/libavcodec/ac3dsp.c
@@ -128,24 +128,33 @@ static void ac3_bit_alloc_calc_bap_c(int16_t *mask, int16_t *psd,
     } while (end > ff_ac3_band_start_tab[band++]);
 }
 
-static int ac3_compute_mantissa_size_c(int mant_cnt[5], uint8_t *bap,
-                                       int nb_coefs)
+static void ac3_update_bap_counts_c(uint16_t mant_cnt[16], uint8_t *bap,
+                                    int len)
 {
-    int bits, b, i;
+    while (len-- > 0)
+        mant_cnt[bap[len]]++;
+}
 
-    bits = 0;
-    for (i = 0; i < nb_coefs; i++) {
-        b = bap[i];
-        if (b <= 4) {
-            // bap=1 to bap=4 will be counted in compute_mantissa_size_final
-            mant_cnt[b]++;
-        } else if (b <= 13) {
-            // bap=5 to bap=13 use (bap-1) bits
-            bits += b - 1;
-        } else {
-            // bap=14 uses 14 bits and bap=15 uses 16 bits
-            bits += (b == 14) ? 14 : 16;
-        }
+DECLARE_ALIGNED(16, const uint16_t, ff_ac3_bap_bits)[16] = {
+    0,  0,  0,  3,  0,  4,  5,  6,  7,  8,  9, 10, 11, 12, 14, 16
+};
+
+static int ac3_compute_mantissa_size_c(uint16_t mant_cnt[6][16])
+{
+    int blk, bap;
+    int bits = 0;
+
+    for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
+        // bap=1 : 3 mantissas in 5 bits
+        bits += (mant_cnt[blk][1] / 3) * 5;
+        // bap=2 : 3 mantissas in 7 bits
+        // bap=4 : 2 mantissas in 7 bits
+        bits += ((mant_cnt[blk][2] / 3) + (mant_cnt[blk][4] >> 1)) * 7;
+        // bap=3 : 1 mantissa in 3 bits
+        bits += mant_cnt[blk][3] * 3;
+        // bap=5 to 15 : get bits per mantissa from table
+        for (bap = 5; bap < 16; bap++)
+            bits += mant_cnt[blk][bap] * ff_ac3_bap_bits[bap];
     }
     return bits;
 }
@@ -181,6 +190,7 @@ av_cold void ff_ac3dsp_init(AC3DSPContext *c, int bit_exact)
     c->ac3_rshift_int32 = ac3_rshift_int32_c;
     c->float_to_fixed24 = float_to_fixed24_c;
     c->bit_alloc_calc_bap = ac3_bit_alloc_calc_bap_c;
+    c->update_bap_counts = ac3_update_bap_counts_c;
     c->compute_mantissa_size = ac3_compute_mantissa_size_c;
     c->extract_exponents = ac3_extract_exponents_c;
 
diff --git a/libavcodec/ac3dsp.h b/libavcodec/ac3dsp.h
index 878c802187..d7131d2c03 100644
--- a/libavcodec/ac3dsp.h
+++ b/libavcodec/ac3dsp.h
@@ -24,6 +24,12 @@
 
 #include <stdint.h>
 
+/**
+ * Number of mantissa bits written for each bap value.
+ * bap values with fractional bits are set to 0 and are calculated separately.
+ */
+extern const uint16_t ff_ac3_bap_bits[16];
+
 typedef struct AC3DSPContext {
     /**
      * Set each encoded exponent in a block to the minimum of itself and the
@@ -102,9 +108,21 @@ typedef struct AC3DSPContext {
                                const uint8_t *bap_tab, uint8_t *bap);
 
     /**
+     * Update bap counts using the supplied array of bap.
+     *
+     * @param[out] mant_cnt   bap counts for 1 block
+     * @param[in]  bap        array of bap, pointing to start coef bin
+     * @param[in]  len        number of elements to process
+     */
+    void (*update_bap_counts)(uint16_t mant_cnt[16], uint8_t *bap, int len);
+
+    /**
      * Calculate the number of bits needed to encode a set of mantissas.
+     *
+     * @param[in] mant_cnt    bap counts for all blocks
+     * @return                mantissa bit count
      */
-    int (*compute_mantissa_size)(int mant_cnt[5], uint8_t *bap, int nb_coefs);
+    int (*compute_mantissa_size)(uint16_t mant_cnt[6][16]);
 
     void (*extract_exponents)(uint8_t *exp, int32_t *coef, int nb_coefs);
 } AC3DSPContext;
diff --git a/libavcodec/ac3enc.c b/libavcodec/ac3enc.c
index e30425bd4e..2835eb3e45 100644
--- a/libavcodec/ac3enc.c
+++ b/libavcodec/ac3enc.c
@@ -75,7 +75,6 @@ typedef struct AC3MDCTContext {
  * Data for a single audio block.
  */
 typedef struct AC3Block {
-    uint8_t  **bap;                             ///< bit allocation pointers (bap)
     CoefType **mdct_coef;                       ///< MDCT coefficients
     int32_t  **fixed_coef;                      ///< fixed-point MDCT coefficients
     uint8_t  **exp;                             ///< original exponents
@@ -90,7 +89,6 @@ typedef struct AC3Block {
     uint8_t  new_rematrixing_strategy;          ///< send new rematrixing flags in this block
     int      num_rematrixing_bands;             ///< number of rematrixing bands
     uint8_t  rematrixing_flags[4];              ///< rematrixing flags
-    struct AC3Block *exp_ref_block[AC3_MAX_CHANNELS]; ///< reference blocks for EXP_REUSE
     int      new_cpl_strategy;                  ///< send new coupling strategy
     int      cpl_in_use;                        ///< coupling in use for this block     (cplinu)
     uint8_t  channel_in_cpl[AC3_MAX_CHANNELS];  ///< channel in coupling                (chincpl)
@@ -115,6 +113,7 @@ typedef struct AC3EncodeContext {
 
     AC3Block blocks[AC3_MAX_BLOCKS];        ///< per-block info
 
+    int eac3;                               ///< indicates if this is E-AC-3 vs. AC-3
     int bitstream_id;                       ///< bitstream id                           (bsid)
     int bitstream_mode;                     ///< bitstream mode                         (bsmod)
 
@@ -125,8 +124,8 @@ typedef struct AC3EncodeContext {
     int frame_size;                         ///< current frame size in bytes
     int frame_size_code;                    ///< frame size code                        (frmsizecod)
     uint16_t crc_inv[2];
-    int bits_written;                       ///< bit count    (used to avg. bitrate)
-    int samples_written;                    ///< sample count (used to avg. bitrate)
+    int64_t bits_written;                   ///< bit count    (used to avg. bitrate)
+    int64_t samples_written;                ///< sample count (used to avg. bitrate)
 
     int fbw_channels;                       ///< number of full-bandwidth channels      (nfchans)
     int channels;                           ///< total number of channels               (nchans)
@@ -186,6 +185,9 @@ typedef struct AC3EncodeContext {
     uint8_t *cpl_coord_mant_buffer;
 
     uint8_t exp_strategy[AC3_MAX_CHANNELS][AC3_MAX_BLOCKS]; ///< exponent strategies
+    uint8_t exp_ref_block[AC3_MAX_CHANNELS][AC3_MAX_BLOCKS]; ///< reference blocks for EXP_REUSE
+    uint8_t *ref_bap     [AC3_MAX_CHANNELS][AC3_MAX_BLOCKS]; ///< bit allocation pointers (bap)
+    int ref_bap_set;                                         ///< indicates if ref_bap pointers have been set
 
     DECLARE_ALIGNED(32, SampleType, windowed_samples)[AC3_WINDOW_SIZE];
 } AC3EncodeContext;
@@ -215,65 +217,25 @@ static const float extmixlev_options[EXTMIXLEV_NUM_OPTIONS] = {
 #define OFFSET(param) offsetof(AC3EncodeContext, options.param)
 #define AC3ENC_PARAM (AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_ENCODING_PARAM)
 
-#if CONFIG_AC3ENC_FLOAT || !CONFIG_AC3_FLOAT_ENCODER //we need this exactly once compiled in
-const AVOption ff_ac3_options[] = {
-/* Metadata Options */
-{"per_frame_metadata", "Allow Changing Metadata Per-Frame", OFFSET(allow_per_frame_metadata), FF_OPT_TYPE_INT, {.dbl = 0 }, 0, 1, AC3ENC_PARAM},
-/* downmix levels */
-{"center_mixlev", "Center Mix Level", OFFSET(center_mix_level), FF_OPT_TYPE_FLOAT, {.dbl = LEVEL_MINUS_4POINT5DB }, 0.0, 1.0, AC3ENC_PARAM},
-{"surround_mixlev", "Surround Mix Level", OFFSET(surround_mix_level), FF_OPT_TYPE_FLOAT, {.dbl = LEVEL_MINUS_6DB }, 0.0, 1.0, AC3ENC_PARAM},
-/* audio production information */
-{"mixing_level", "Mixing Level", OFFSET(mixing_level), FF_OPT_TYPE_INT, {.dbl = -1 }, -1, 111, AC3ENC_PARAM},
-{"room_type", "Room Type", OFFSET(room_type), FF_OPT_TYPE_INT, {.dbl = -1 }, -1, 2, AC3ENC_PARAM, "room_type"},
-    {"notindicated", "Not Indicated (default)", 0, FF_OPT_TYPE_CONST, {.dbl = 0 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "room_type"},
-    {"large",        "Large Room",              0, FF_OPT_TYPE_CONST, {.dbl = 1 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "room_type"},
-    {"small",        "Small Room",              0, FF_OPT_TYPE_CONST, {.dbl = 2 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "room_type"},
-/* other metadata options */
-{"copyright", "Copyright Bit", OFFSET(copyright), FF_OPT_TYPE_INT, {.dbl = 0 }, 0, 1, AC3ENC_PARAM},
-{"dialnorm", "Dialogue Level (dB)", OFFSET(dialogue_level), FF_OPT_TYPE_INT, {.dbl = -31 }, -31, -1, AC3ENC_PARAM},
-{"dsur_mode", "Dolby Surround Mode", OFFSET(dolby_surround_mode), FF_OPT_TYPE_INT, {.dbl = 0 }, 0, 2, AC3ENC_PARAM, "dsur_mode"},
-    {"notindicated", "Not Indicated (default)",    0, FF_OPT_TYPE_CONST, {.dbl = 0 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "dsur_mode"},
-    {"on",           "Dolby Surround Encoded",     0, FF_OPT_TYPE_CONST, {.dbl = 1 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "dsur_mode"},
-    {"off",          "Not Dolby Surround Encoded", 0, FF_OPT_TYPE_CONST, {.dbl = 2 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "dsur_mode"},
-{"original", "Original Bit Stream", OFFSET(original), FF_OPT_TYPE_INT,   {.dbl = 1 }, 0, 1, AC3ENC_PARAM},
-/* extended bitstream information */
-{"dmix_mode", "Preferred Stereo Downmix Mode", OFFSET(preferred_stereo_downmix), FF_OPT_TYPE_INT, {.dbl = -1 }, -1, 2, AC3ENC_PARAM, "dmix_mode"},
-    {"notindicated", "Not Indicated (default)", 0, FF_OPT_TYPE_CONST, {.dbl = 0 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "dmix_mode"},
-    {"ltrt", "Lt/Rt Downmix Preferred",         0, FF_OPT_TYPE_CONST, {.dbl = 1 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "dmix_mode"},
-    {"loro", "Lo/Ro Downmix Preferred",         0, FF_OPT_TYPE_CONST, {.dbl = 2 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "dmix_mode"},
-{"ltrt_cmixlev", "Lt/Rt Center Mix Level", OFFSET(ltrt_center_mix_level), FF_OPT_TYPE_FLOAT, {.dbl = -1.0 }, -1.0, 2.0, AC3ENC_PARAM},
-{"ltrt_surmixlev", "Lt/Rt Surround Mix Level", OFFSET(ltrt_surround_mix_level), FF_OPT_TYPE_FLOAT, {.dbl = -1.0 }, -1.0, 2.0, AC3ENC_PARAM},
-{"loro_cmixlev", "Lo/Ro Center Mix Level", OFFSET(loro_center_mix_level), FF_OPT_TYPE_FLOAT, {.dbl = -1.0 }, -1.0, 2.0, AC3ENC_PARAM},
-{"loro_surmixlev", "Lo/Ro Surround Mix Level", OFFSET(loro_surround_mix_level), FF_OPT_TYPE_FLOAT, {.dbl = -1.0 }, -1.0, 2.0, AC3ENC_PARAM},
-{"dsurex_mode", "Dolby Surround EX Mode", OFFSET(dolby_surround_ex_mode), FF_OPT_TYPE_INT, {.dbl = -1 }, -1, 2, AC3ENC_PARAM, "dsurex_mode"},
-    {"notindicated", "Not Indicated (default)",       0, FF_OPT_TYPE_CONST, {.dbl = 0 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "dsurex_mode"},
-    {"on",           "Dolby Surround EX Encoded",     0, FF_OPT_TYPE_CONST, {.dbl = 1 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "dsurex_mode"},
-    {"off",          "Not Dolby Surround EX Encoded", 0, FF_OPT_TYPE_CONST, {.dbl = 2 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "dsurex_mode"},
-{"dheadphone_mode", "Dolby Headphone Mode", OFFSET(dolby_headphone_mode), FF_OPT_TYPE_INT, {.dbl = -1 }, -1, 2, AC3ENC_PARAM, "dheadphone_mode"},
-    {"notindicated", "Not Indicated (default)",     0, FF_OPT_TYPE_CONST, {.dbl = 0 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "dheadphone_mode"},
-    {"on",           "Dolby Headphone Encoded",     0, FF_OPT_TYPE_CONST, {.dbl = 1 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "dheadphone_mode"},
-    {"off",          "Not Dolby Headphone Encoded", 0, FF_OPT_TYPE_CONST, {.dbl = 2 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "dheadphone_mode"},
-{"ad_conv_type", "A/D Converter Type", OFFSET(ad_converter_type), FF_OPT_TYPE_INT, {.dbl = -1 }, -1, 1, AC3ENC_PARAM, "ad_conv_type"},
-    {"standard", "Standard (default)", 0, FF_OPT_TYPE_CONST, {.dbl = 0 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "ad_conv_type"},
-    {"hdcd",     "HDCD",               0, FF_OPT_TYPE_CONST, {.dbl = 1 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "ad_conv_type"},
-/* Other Encoding Options */
-{"stereo_rematrixing", "Stereo Rematrixing", OFFSET(stereo_rematrixing), FF_OPT_TYPE_INT, {.dbl = 1 }, 0, 1, AC3ENC_PARAM},
-#if CONFIG_AC3ENC_FLOAT
-{"channel_coupling",   "Channel Coupling",   OFFSET(channel_coupling),   FF_OPT_TYPE_INT, {.dbl = 1 }, 0, 1, AC3ENC_PARAM, "channel_coupling"},
-    {"auto", "Selected by the Encoder", 0, FF_OPT_TYPE_CONST, {.dbl = -1 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "channel_coupling"},
-{"cpl_start_band", "Coupling Start Band", OFFSET(cpl_start), FF_OPT_TYPE_INT, {.dbl = -1 }, -1, 15, AC3ENC_PARAM, "cpl_start_band"},
-    {"auto", "Selected by the Encoder", 0, FF_OPT_TYPE_CONST, {.dbl = -1 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "cpl_start_band"},
-#endif
-{NULL}
-};
-#endif
+#define AC3ENC_TYPE_AC3_FIXED   0
+#define AC3ENC_TYPE_AC3         1
+#define AC3ENC_TYPE_EAC3        2
 
 #if CONFIG_AC3ENC_FLOAT
+#define AC3ENC_TYPE AC3ENC_TYPE_AC3
+#include "ac3enc_opts_template.c"
 static AVClass ac3enc_class = { "AC-3 Encoder", av_default_item_name,
-                                ff_ac3_options, LIBAVUTIL_VERSION_INT };
+                                ac3_options, LIBAVUTIL_VERSION_INT };
+#undef AC3ENC_TYPE
+#define AC3ENC_TYPE AC3ENC_TYPE_EAC3
+#include "ac3enc_opts_template.c"
+static AVClass eac3enc_class = { "E-AC-3 Encoder", av_default_item_name,
+                                 eac3_options, LIBAVUTIL_VERSION_INT };
 #else
+#define AC3ENC_TYPE AC3ENC_TYPE_AC3_FIXED
+#include "ac3enc_opts_template.c"
 static AVClass ac3enc_class = { "Fixed-Point AC-3 Encoder", av_default_item_name,
-                                ff_ac3_options, LIBAVUTIL_VERSION_INT };
+                                ac3fixed_options, LIBAVUTIL_VERSION_INT };
 #endif
 
 
@@ -402,7 +364,7 @@ static const int8_t ac3_coupling_start_tab[6][3][19] = {
 
 /**
  * Adjust the frame size to make the average bit rate match the target bit rate.
- * This is only needed for 11025, 22050, and 44100 sample rates.
+ * This is only needed for 11025, 22050, and 44100 sample rates or any E-AC-3.
  */
 static void adjust_frame_size(AC3EncodeContext *s)
 {
@@ -555,12 +517,15 @@ static inline float calc_cpl_coord(float energy_ch, float energy_cpl)
 static void apply_channel_coupling(AC3EncodeContext *s)
 {
 #if CONFIG_AC3ENC_FLOAT
-    DECLARE_ALIGNED(16, float,   cpl_coords)      [AC3_MAX_BLOCKS][AC3_MAX_CHANNELS][16] = {{{0}}};
-    DECLARE_ALIGNED(16, int32_t, fixed_cpl_coords)[AC3_MAX_BLOCKS][AC3_MAX_CHANNELS][16] = {{{0}}};
+    LOCAL_ALIGNED_16(float,   cpl_coords,       [AC3_MAX_BLOCKS], [AC3_MAX_CHANNELS][16]);
+    LOCAL_ALIGNED_16(int32_t, fixed_cpl_coords, [AC3_MAX_BLOCKS], [AC3_MAX_CHANNELS][16]);
     int blk, ch, bnd, i, j;
     CoefSumType energy[AC3_MAX_BLOCKS][AC3_MAX_CHANNELS][16] = {{{0}}};
     int num_cpl_coefs = s->num_cpl_subbands * 12;
 
+    memset(cpl_coords,       0, AC3_MAX_BLOCKS * sizeof(*cpl_coords));
+    memset(fixed_cpl_coords, 0, AC3_MAX_BLOCKS * sizeof(*fixed_cpl_coords));
+
     /* calculate coupling channel from fbw channels */
     for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
         AC3Block *block = &s->blocks[blk];
@@ -749,6 +714,35 @@ static void apply_channel_coupling(AC3EncodeContext *s)
             }
         }
     }
+
+    if (s->eac3) {
+        /* set first cpl coords */
+        int first_cpl_coords[AC3_MAX_CHANNELS];
+        for (ch = 1; ch <= s->fbw_channels; ch++)
+            first_cpl_coords[ch] = 1;
+        for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
+            AC3Block *block = &s->blocks[blk];
+            for (ch = 1; ch <= s->fbw_channels; ch++) {
+                if (block->channel_in_cpl[ch]) {
+                    if (first_cpl_coords[ch]) {
+                        block->new_cpl_coords = 2;
+                        first_cpl_coords[ch]  = 0;
+                    }
+                } else {
+                    first_cpl_coords[ch] = 1;
+                }
+            }
+        }
+
+        /* set first cpl leak */
+        for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
+            AC3Block *block = &s->blocks[blk];
+            if (block->cpl_in_use) {
+                block->new_cpl_leak = 2;
+                break;
+            }
+        }
+    }
 #endif /* CONFIG_AC3ENC_FLOAT */
 }
 
@@ -1050,10 +1044,10 @@ static void encode_exponents(AC3EncodeContext *s)
             blk1 = blk + 1;
 
             /* count the number of EXP_REUSE blocks after the current block
-               and set exponent reference block pointers */
-            block->exp_ref_block[ch] = block;
+               and set exponent reference block numbers */
+            s->exp_ref_block[ch][blk] = blk;
             while (blk1 < AC3_MAX_BLOCKS && exp_strategy[blk1] == EXP_REUSE) {
-                s->blocks[blk1].exp_ref_block[ch] = block;
+                s->exp_ref_block[ch][blk1] = blk;
                 blk1++;
             }
             num_reuse_blocks = blk1 - blk - 1;
@@ -1068,6 +1062,9 @@ static void encode_exponents(AC3EncodeContext *s)
             blk = blk1;
         }
     }
+
+    /* reference block numbers have been changed, so reset ref_bap_set */
+    s->ref_bap_set = 0;
 }
 
 
@@ -1166,38 +1163,72 @@ static void count_frame_bits_fixed(AC3EncodeContext *s)
      *   no delta bit allocation
      *   no skipped data
      *   no auxilliary data
+     *   no E-AC-3 metadata
      */
 
     /* header */
-    frame_bits = 65;
-    frame_bits += frame_bits_inc[s->channel_mode];
+    frame_bits = 16; /* sync info */
+    if (s->eac3) {
+        /* bitstream info header */
+        frame_bits += 35;
+        frame_bits += 1 + 1 + 1;
+        /* audio frame header */
+        frame_bits += 2;
+        frame_bits += 10;
+        /* exponent strategy */
+        for (blk = 0; blk < AC3_MAX_BLOCKS; blk++)
+            frame_bits += 2 * s->fbw_channels + s->lfe_on;
+        /* converter exponent strategy */
+        frame_bits += s->fbw_channels * 5;
+        /* snr offsets */
+        frame_bits += 10;
+        /* block start info */
+        frame_bits++;
+    } else {
+        frame_bits += 49;
+        frame_bits += frame_bits_inc[s->channel_mode];
+    }
 
     /* audio blocks */
     for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
-        /* block switch flags */
-        frame_bits += s->fbw_channels;
+        if (!s->eac3) {
+            /* block switch flags */
+            frame_bits += s->fbw_channels;
 
-        /* dither flags */
-        frame_bits += s->fbw_channels;
+            /* dither flags */
+            frame_bits += s->fbw_channels;
+        }
 
         /* dynamic range */
         frame_bits++;
 
-        /* exponent strategy */
-        frame_bits += 2 * s->fbw_channels;
-        if (s->lfe_on)
+        /* spectral extension */
+        if (s->eac3)
             frame_bits++;
 
-        /* bit allocation params */
-        frame_bits++;
-        if (!blk)
-            frame_bits += 2 + 2 + 2 + 2 + 3;
+        if (!s->eac3) {
+            /* exponent strategy */
+            frame_bits += 2 * s->fbw_channels;
+            if (s->lfe_on)
+                frame_bits++;
 
-        /* delta bit allocation */
-        frame_bits++;
+            /* bit allocation params */
+            frame_bits++;
+            if (!blk)
+                frame_bits += 2 + 2 + 2 + 2 + 3;
+        }
 
-        /* skipped data */
-        frame_bits++;
+        /* converter snr offset */
+        if (s->eac3)
+            frame_bits++;
+
+        if (!s->eac3) {
+            /* delta bit allocation */
+            frame_bits++;
+
+            /* skipped data */
+            frame_bits++;
+        }
     }
 
     /* auxiliary data */
@@ -1222,7 +1253,7 @@ static void bit_alloc_init(AC3EncodeContext *s)
     s->slow_decay_code = 2;
     s->fast_decay_code = 1;
     s->slow_gain_code  = 1;
-    s->db_per_bit_code = 3;
+    s->db_per_bit_code = s->eac3 ? 2 : 3;
     s->floor_code      = 7;
     for (ch = 0; ch <= s->channels; ch++)
         s->fast_gain_code[ch] = 4;
@@ -1257,13 +1288,29 @@ static void count_frame_bits(AC3EncodeContext *s)
     int frame_bits = 0;
 
     /* header */
-    if (opt->audio_production_info)
-        frame_bits += 7;
-    if (s->bitstream_id == 6) {
-        if (opt->extended_bsi_1)
-            frame_bits += 14;
-        if (opt->extended_bsi_2)
-            frame_bits += 14;
+    if (s->eac3) {
+        /* coupling */
+        if (s->channel_mode > AC3_CHMODE_MONO) {
+            frame_bits++;
+            for (blk = 1; blk < AC3_MAX_BLOCKS; blk++) {
+                AC3Block *block = &s->blocks[blk];
+                frame_bits++;
+                if (block->new_cpl_strategy)
+                    frame_bits++;
+            }
+        }
+        /* coupling exponent strategy */
+        for (blk = 0; blk < AC3_MAX_BLOCKS; blk++)
+            frame_bits += 2 * s->blocks[blk].cpl_in_use;
+    } else {
+        if (opt->audio_production_info)
+            frame_bits += 7;
+        if (s->bitstream_id == 6) {
+            if (opt->extended_bsi_1)
+                frame_bits += 14;
+            if (opt->extended_bsi_2)
+                frame_bits += 14;
+        }
     }
 
     /* audio blocks */
@@ -1271,15 +1318,23 @@ static void count_frame_bits(AC3EncodeContext *s)
         AC3Block *block = &s->blocks[blk];
 
         /* coupling strategy */
-        frame_bits++;
-        if (block->new_cpl_strategy) {
+        if (!s->eac3)
             frame_bits++;
+        if (block->new_cpl_strategy) {
+            if (!s->eac3)
+                frame_bits++;
             if (block->cpl_in_use) {
-                frame_bits += s->fbw_channels;
+                if (s->eac3)
+                    frame_bits++;
+                if (!s->eac3 || s->channel_mode != AC3_CHMODE_STEREO)
+                    frame_bits += s->fbw_channels;
                 if (s->channel_mode == AC3_CHMODE_STEREO)
                     frame_bits++;
                 frame_bits += 4 + 4;
-                frame_bits += s->num_cpl_subbands - 1;
+                if (s->eac3)
+                    frame_bits++;
+                else
+                    frame_bits += s->num_cpl_subbands - 1;
             }
         }
 
@@ -1287,7 +1342,8 @@ static void count_frame_bits(AC3EncodeContext *s)
         if (block->cpl_in_use) {
             for (ch = 1; ch <= s->fbw_channels; ch++) {
                 if (block->channel_in_cpl[ch]) {
-                    frame_bits++;
+                    if (!s->eac3 || block->new_cpl_coords != 2)
+                        frame_bits++;
                     if (block->new_cpl_coords) {
                         frame_bits += 2;
                         frame_bits += (4 + 4) * s->num_cpl_bands;
@@ -1298,7 +1354,8 @@ static void count_frame_bits(AC3EncodeContext *s)
 
         /* stereo rematrixing */
         if (s->channel_mode == AC3_CHMODE_STEREO) {
-            frame_bits++;
+            if (!s->eac3 || blk > 0)
+                frame_bits++;
             if (s->blocks[blk].new_rematrixing_strategy)
                 frame_bits += block->num_rematrixing_bands;
         }
@@ -1313,17 +1370,20 @@ static void count_frame_bits(AC3EncodeContext *s)
         }
 
         /* coupling exponent strategy */
-        if (block->cpl_in_use)
+        if (!s->eac3 && block->cpl_in_use)
             frame_bits += 2;
 
         /* snr offsets and fast gain codes */
-        frame_bits++;
-        if (block->new_snr_offsets)
-            frame_bits += 6 + (s->channels + block->cpl_in_use) * (4 + 3);
+        if (!s->eac3) {
+            frame_bits++;
+            if (block->new_snr_offsets)
+                frame_bits += 6 + (s->channels + block->cpl_in_use) * (4 + 3);
+        }
 
         /* coupling leak info */
         if (block->cpl_in_use) {
-            frame_bits++;
+            if (!s->eac3 || block->new_cpl_leak != 2)
+                frame_bits++;
             if (block->new_cpl_leak)
                 frame_bits += 3 + 3;
         }
@@ -1334,22 +1394,6 @@ static void count_frame_bits(AC3EncodeContext *s)
 
 
 /**
- * Finalize the mantissa bit count by adding in the grouped mantissas.
- */
-static int compute_mantissa_size_final(int mant_cnt[5])
-{
-    // bap=1 : 3 mantissas in 5 bits
-    int bits = (mant_cnt[1] / 3) * 5;
-    // bap=2 : 3 mantissas in 7 bits
-    // bap=4 : 2 mantissas in 7 bits
-    bits += ((mant_cnt[2] / 3) + (mant_cnt[4] >> 1)) * 7;
-    // bap=3 : each mantissa is 3 bits
-    bits += mant_cnt[3] * 3;
-    return bits;
-}
-
-
-/**
  * Calculate masking curve based on the final exponents.
  * Also calculate the power spectral densities to use in future calculations.
  */
@@ -1386,18 +1430,79 @@ static void bit_alloc_masking(AC3EncodeContext *s)
 static void reset_block_bap(AC3EncodeContext *s)
 {
     int blk, ch;
-    int channels = s->channels + 1;
-    if (s->blocks[0].bap[0] == s->bap_buffer)
+    uint8_t *ref_bap;
+
+    if (s->ref_bap[0][0] == s->bap_buffer && s->ref_bap_set)
         return;
+
+    ref_bap = s->bap_buffer;
+    for (ch = 0; ch <= s->channels; ch++) {
+        for (blk = 0; blk < AC3_MAX_BLOCKS; blk++)
+            s->ref_bap[ch][blk] = ref_bap + AC3_MAX_COEFS * s->exp_ref_block[ch][blk];
+        ref_bap += AC3_MAX_COEFS * AC3_MAX_BLOCKS;
+    }
+    s->ref_bap_set = 1;
+}
+
+
+/**
+ * Initialize mantissa counts.
+ * These are set so that they are padded to the next whole group size when bits
+ * are counted in compute_mantissa_size.
+ */
+static void count_mantissa_bits_init(uint16_t mant_cnt[AC3_MAX_BLOCKS][16])
+{
+    int blk;
+
     for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
-        for (ch = 0; ch < channels; ch++) {
-            s->blocks[blk].bap[ch] = &s->bap_buffer[AC3_MAX_COEFS * (blk * channels + ch)];
-        }
+        memset(mant_cnt[blk], 0, sizeof(mant_cnt[blk]));
+        mant_cnt[blk][1] = mant_cnt[blk][2] = 2;
+        mant_cnt[blk][4] = 1;
+    }
+}
+
+
+/**
+ * Update mantissa bit counts for all blocks in 1 channel in a given bandwidth
+ * range.
+ */
+static void count_mantissa_bits_update_ch(AC3EncodeContext *s, int ch,
+                                          uint16_t mant_cnt[AC3_MAX_BLOCKS][16],
+                                          int start, int end)
+{
+    int blk;
+
+    for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
+        AC3Block *block = &s->blocks[blk];
+        if (ch == CPL_CH && !block->cpl_in_use)
+            continue;
+        s->ac3dsp.update_bap_counts(mant_cnt[blk],
+                                    s->ref_bap[ch][blk] + start,
+                                    FFMIN(end, block->end_freq[ch]) - start);
     }
 }
 
 
 /**
+ * Count the number of mantissa bits in the frame based on the bap values.
+ */
+static int count_mantissa_bits(AC3EncodeContext *s)
+{
+    int ch, max_end_freq;
+    LOCAL_ALIGNED_16(uint16_t, mant_cnt, [AC3_MAX_BLOCKS], [16]);
+
+    count_mantissa_bits_init(mant_cnt);
+
+    max_end_freq = s->bandwidth_code * 3 + 73;
+    for (ch = !s->cpl_enabled; ch <= s->channels; ch++)
+        count_mantissa_bits_update_ch(s, ch, mant_cnt, s->start_freq[ch],
+                                      max_end_freq);
+
+    return s->ac3dsp.compute_mantissa_size(mant_cnt);
+}
+
+
+/**
  * Run the bit allocation with a given SNR offset.
  * This calculates the bit allocation pointers that will be used to determine
  * the quantization of each mantissa.
@@ -1407,51 +1512,27 @@ static void reset_block_bap(AC3EncodeContext *s)
 static int bit_alloc(AC3EncodeContext *s, int snr_offset)
 {
     int blk, ch;
-    int mantissa_bits;
-    int mant_cnt[5];
 
     snr_offset = (snr_offset - 240) << 2;
 
     reset_block_bap(s);
-    mantissa_bits = 0;
     for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
         AC3Block *block = &s->blocks[blk];
-        AC3Block *ref_block;
-        int av_uninit(ch0);
-        int got_cpl = !block->cpl_in_use;
-        // initialize grouped mantissa counts. these are set so that they are
-        // padded to the next whole group size when bits are counted in
-        // compute_mantissa_size_final
-        mant_cnt[0] = mant_cnt[3] = 0;
-        mant_cnt[1] = mant_cnt[2] = 2;
-        mant_cnt[4] = 1;
-        for (ch = 1; ch <= s->channels; ch++) {
-            if (!got_cpl && ch > 1 && block->channel_in_cpl[ch-1]) {
-                ch0     = ch - 1;
-                ch      = CPL_CH;
-                got_cpl = 1;
-            }
 
+        for (ch = !block->cpl_in_use; ch <= s->channels; ch++) {
             /* Currently the only bit allocation parameters which vary across
                blocks within a frame are the exponent values.  We can take
                advantage of that by reusing the bit allocation pointers
                whenever we reuse exponents. */
-            ref_block = block->exp_ref_block[ch];
             if (s->exp_strategy[ch][blk] != EXP_REUSE) {
-                s->ac3dsp.bit_alloc_calc_bap(ref_block->mask[ch], ref_block->psd[ch],
+                s->ac3dsp.bit_alloc_calc_bap(block->mask[ch], block->psd[ch],
                                              s->start_freq[ch], block->end_freq[ch],
                                              snr_offset, s->bit_alloc.floor,
-                                             ff_ac3_bap_tab, ref_block->bap[ch]);
+                                             ff_ac3_bap_tab, s->ref_bap[ch][blk]);
             }
-            mantissa_bits += s->ac3dsp.compute_mantissa_size(mant_cnt,
-                                                             ref_block->bap[ch]+s->start_freq[ch],
-                                                             block->end_freq[ch]-s->start_freq[ch]);
-            if (ch == CPL_CH)
-                ch = ch0;
         }
-        mantissa_bits += compute_mantissa_size_final(mant_cnt);
     }
-    return mantissa_bits;
+    return count_mantissa_bits(s);
 }
 
 
@@ -1726,7 +1807,6 @@ static void quantize_mantissas(AC3EncodeContext *s)
 
     for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
         AC3Block *block = &s->blocks[blk];
-        AC3Block *ref_block;
         AC3Mant m = { 0 };
 
         got_cpl = !block->cpl_in_use;
@@ -1736,10 +1816,9 @@ static void quantize_mantissas(AC3EncodeContext *s)
                 ch      = CPL_CH;
                 got_cpl = 1;
             }
-            ref_block = block->exp_ref_block[ch];
             quantize_mantissas_blk_ch(&m, block->fixed_coef[ch],
-                                      ref_block->exp[ch],
-                                      ref_block->bap[ch], block->qmant[ch],
+                                      s->blocks[s->exp_ref_block[ch][blk]].exp[ch],
+                                      s->ref_bap[ch][blk], block->qmant[ch],
                                       s->start_freq[ch], block->end_freq[ch]);
             if (ch == CPL_CH)
                 ch = ch0;
@@ -1751,7 +1830,7 @@ static void quantize_mantissas(AC3EncodeContext *s)
 /**
  * Write the AC-3 frame header to the output bitstream.
  */
-static void output_frame_header(AC3EncodeContext *s)
+static void ac3_output_frame_header(AC3EncodeContext *s)
 {
     AC3EncOptions *opt = &s->options;
 
@@ -1805,6 +1884,79 @@ static void output_frame_header(AC3EncodeContext *s)
 
 
 /**
+ * Write the E-AC-3 frame header to the output bitstream.
+ */
+static void eac3_output_frame_header(AC3EncodeContext *s)
+{
+    int blk, ch;
+    AC3EncOptions *opt = &s->options;
+
+    put_bits(&s->pb, 16, 0x0b77);                   /* sync word */
+
+    /* BSI header */
+    put_bits(&s->pb,  2, 0);                        /* stream type = independent */
+    put_bits(&s->pb,  3, 0);                        /* substream id = 0 */
+    put_bits(&s->pb, 11, (s->frame_size / 2) - 1);  /* frame size */
+    if (s->bit_alloc.sr_shift) {
+        put_bits(&s->pb, 2, 0x3);                   /* fscod2 */
+        put_bits(&s->pb, 2, s->bit_alloc.sr_code);  /* sample rate code */
+    } else {
+        put_bits(&s->pb, 2, s->bit_alloc.sr_code);  /* sample rate code */
+        put_bits(&s->pb, 2, 0x3);                   /* number of blocks = 6 */
+    }
+    put_bits(&s->pb, 3, s->channel_mode);           /* audio coding mode */
+    put_bits(&s->pb, 1, s->lfe_on);                 /* LFE channel indicator */
+    put_bits(&s->pb, 5, s->bitstream_id);           /* bitstream id (EAC3=16) */
+    put_bits(&s->pb, 5, -opt->dialogue_level);      /* dialogue normalization level */
+    put_bits(&s->pb, 1, 0);                         /* no compression gain */
+    put_bits(&s->pb, 1, 0);                         /* no mixing metadata */
+    /* TODO: mixing metadata */
+    put_bits(&s->pb, 1, 0);                         /* no info metadata */
+    /* TODO: info metadata */
+    put_bits(&s->pb, 1, 0);                         /* no additional bit stream info */
+
+    /* frame header */
+    put_bits(&s->pb, 1, 1);                         /* exponent strategy syntax = each block */
+    put_bits(&s->pb, 1, 0);                         /* aht enabled = no */
+    put_bits(&s->pb, 2, 0);                         /* snr offset strategy = 1 */
+    put_bits(&s->pb, 1, 0);                         /* transient pre-noise processing enabled = no */
+    put_bits(&s->pb, 1, 0);                         /* block switch syntax enabled = no */
+    put_bits(&s->pb, 1, 0);                         /* dither flag syntax enabled = no */
+    put_bits(&s->pb, 1, 0);                         /* bit allocation model syntax enabled = no */
+    put_bits(&s->pb, 1, 0);                         /* fast gain codes enabled = no */
+    put_bits(&s->pb, 1, 0);                         /* dba syntax enabled = no */
+    put_bits(&s->pb, 1, 0);                         /* skip field syntax enabled = no */
+    put_bits(&s->pb, 1, 0);                         /* spx enabled = no */
+    /* coupling strategy use flags */
+    if (s->channel_mode > AC3_CHMODE_MONO) {
+        put_bits(&s->pb, 1, s->blocks[0].cpl_in_use);
+        for (blk = 1; blk < AC3_MAX_BLOCKS; blk++) {
+            AC3Block *block = &s->blocks[blk];
+            put_bits(&s->pb, 1, block->new_cpl_strategy);
+            if (block->new_cpl_strategy)
+                put_bits(&s->pb, 1, block->cpl_in_use);
+        }
+    }
+    /* exponent strategy */
+    for (blk = 0; blk < AC3_MAX_BLOCKS; blk++)
+        for (ch = !s->blocks[blk].cpl_in_use; ch <= s->fbw_channels; ch++)
+            put_bits(&s->pb, 2, s->exp_strategy[ch][blk]);
+    if (s->lfe_on) {
+        for (blk = 0; blk < AC3_MAX_BLOCKS; blk++)
+            put_bits(&s->pb, 1, s->exp_strategy[s->lfe_channel][blk]);
+    }
+    /* E-AC-3 to AC-3 converter exponent strategy (unfortunately not optional...) */
+    for (ch = 1; ch <= s->fbw_channels; ch++)
+        put_bits(&s->pb, 5, 0);
+    /* snr offsets */
+    put_bits(&s->pb, 6, s->coarse_snr_offset);
+    put_bits(&s->pb, 4, s->fine_snr_offset[1]);
+    /* block start info */
+    put_bits(&s->pb, 1, 0);
+}
+
+
+/**
  * Write one audio block to the output bitstream.
  */
 static void output_audio_block(AC3EncodeContext *s, int blk)
@@ -1814,32 +1966,51 @@ static void output_audio_block(AC3EncodeContext *s, int blk)
     AC3Block *block = &s->blocks[blk];
 
     /* block switching */
-    for (ch = 0; ch < s->fbw_channels; ch++)
-        put_bits(&s->pb, 1, 0);
+    if (!s->eac3) {
+        for (ch = 0; ch < s->fbw_channels; ch++)
+            put_bits(&s->pb, 1, 0);
+    }
 
     /* dither flags */
-    for (ch = 0; ch < s->fbw_channels; ch++)
-        put_bits(&s->pb, 1, 1);
+    if (!s->eac3) {
+        for (ch = 0; ch < s->fbw_channels; ch++)
+            put_bits(&s->pb, 1, 1);
+    }
 
     /* dynamic range codes */
     put_bits(&s->pb, 1, 0);
 
+    /* spectral extension */
+    if (s->eac3)
+        put_bits(&s->pb, 1, 0);
+
     /* channel coupling */
-    put_bits(&s->pb, 1, block->new_cpl_strategy);
+    if (!s->eac3)
+        put_bits(&s->pb, 1, block->new_cpl_strategy);
     if (block->new_cpl_strategy) {
-        put_bits(&s->pb, 1, block->cpl_in_use);
+        if (!s->eac3)
+            put_bits(&s->pb, 1, block->cpl_in_use);
         if (block->cpl_in_use) {
             int start_sub, end_sub;
-            for (ch = 1; ch <= s->fbw_channels; ch++)
-                put_bits(&s->pb, 1, block->channel_in_cpl[ch]);
+            if (s->eac3)
+                put_bits(&s->pb, 1, 0); /* enhanced coupling */
+            if (!s->eac3 || s->channel_mode != AC3_CHMODE_STEREO) {
+                for (ch = 1; ch <= s->fbw_channels; ch++)
+                    put_bits(&s->pb, 1, block->channel_in_cpl[ch]);
+            }
             if (s->channel_mode == AC3_CHMODE_STEREO)
                 put_bits(&s->pb, 1, 0); /* phase flags in use */
             start_sub = (s->start_freq[CPL_CH] - 37) / 12;
             end_sub   = (s->cpl_end_freq       - 37) / 12;
             put_bits(&s->pb, 4, start_sub);
             put_bits(&s->pb, 4, end_sub - 3);
-            for (bnd = start_sub+1; bnd < end_sub; bnd++)
-                put_bits(&s->pb, 1, ff_eac3_default_cpl_band_struct[bnd]);
+            /* coupling band structure */
+            if (s->eac3) {
+                put_bits(&s->pb, 1, 0); /* use default */
+            } else {
+                for (bnd = start_sub+1; bnd < end_sub; bnd++)
+                    put_bits(&s->pb, 1, ff_eac3_default_cpl_band_struct[bnd]);
+            }
         }
     }
 
@@ -1847,7 +2018,8 @@ static void output_audio_block(AC3EncodeContext *s, int blk)
     if (block->cpl_in_use) {
         for (ch = 1; ch <= s->fbw_channels; ch++) {
             if (block->channel_in_cpl[ch]) {
-                put_bits(&s->pb, 1, block->new_cpl_coords);
+                if (!s->eac3 || block->new_cpl_coords != 2)
+                    put_bits(&s->pb, 1, block->new_cpl_coords);
                 if (block->new_cpl_coords) {
                     put_bits(&s->pb, 2, block->cpl_master_exp[ch]);
                     for (bnd = 0; bnd < s->num_cpl_bands; bnd++) {
@@ -1861,7 +2033,8 @@ static void output_audio_block(AC3EncodeContext *s, int blk)
 
     /* stereo rematrixing */
     if (s->channel_mode == AC3_CHMODE_STEREO) {
-        put_bits(&s->pb, 1, block->new_rematrixing_strategy);
+        if (!s->eac3 || blk > 0)
+            put_bits(&s->pb, 1, block->new_rematrixing_strategy);
         if (block->new_rematrixing_strategy) {
             /* rematrixing flags */
             for (bnd = 0; bnd < block->num_rematrixing_bands; bnd++)
@@ -1870,10 +2043,12 @@ static void output_audio_block(AC3EncodeContext *s, int blk)
     }
 
     /* exponent strategy */
-    for (ch = !block->cpl_in_use; ch <= s->fbw_channels; ch++)
-        put_bits(&s->pb, 2, s->exp_strategy[ch][blk]);
-    if (s->lfe_on)
-        put_bits(&s->pb, 1, s->exp_strategy[s->lfe_channel][blk]);
+    if (!s->eac3) {
+        for (ch = !block->cpl_in_use; ch <= s->fbw_channels; ch++)
+            put_bits(&s->pb, 2, s->exp_strategy[ch][blk]);
+        if (s->lfe_on)
+            put_bits(&s->pb, 1, s->exp_strategy[s->lfe_channel][blk]);
+    }
 
     /* bandwidth */
     for (ch = 1; ch <= s->fbw_channels; ch++) {
@@ -1903,53 +2078,60 @@ static void output_audio_block(AC3EncodeContext *s, int blk)
     }
 
     /* bit allocation info */
-    baie = (blk == 0);
-    put_bits(&s->pb, 1, baie);
-    if (baie) {
-        put_bits(&s->pb, 2, s->slow_decay_code);
-        put_bits(&s->pb, 2, s->fast_decay_code);
-        put_bits(&s->pb, 2, s->slow_gain_code);
-        put_bits(&s->pb, 2, s->db_per_bit_code);
-        put_bits(&s->pb, 3, s->floor_code);
+    if (!s->eac3) {
+        baie = (blk == 0);
+        put_bits(&s->pb, 1, baie);
+        if (baie) {
+            put_bits(&s->pb, 2, s->slow_decay_code);
+            put_bits(&s->pb, 2, s->fast_decay_code);
+            put_bits(&s->pb, 2, s->slow_gain_code);
+            put_bits(&s->pb, 2, s->db_per_bit_code);
+            put_bits(&s->pb, 3, s->floor_code);
+        }
     }
 
     /* snr offset */
-    put_bits(&s->pb, 1, block->new_snr_offsets);
-    if (block->new_snr_offsets) {
-        put_bits(&s->pb, 6, s->coarse_snr_offset);
-        for (ch = !block->cpl_in_use; ch <= s->channels; ch++) {
-            put_bits(&s->pb, 4, s->fine_snr_offset[ch]);
-            put_bits(&s->pb, 3, s->fast_gain_code[ch]);
+    if (!s->eac3) {
+        put_bits(&s->pb, 1, block->new_snr_offsets);
+        if (block->new_snr_offsets) {
+            put_bits(&s->pb, 6, s->coarse_snr_offset);
+            for (ch = !block->cpl_in_use; ch <= s->channels; ch++) {
+                put_bits(&s->pb, 4, s->fine_snr_offset[ch]);
+                put_bits(&s->pb, 3, s->fast_gain_code[ch]);
+            }
         }
+    } else {
+        put_bits(&s->pb, 1, 0); /* no converter snr offset */
     }
 
     /* coupling leak */
     if (block->cpl_in_use) {
-        put_bits(&s->pb, 1, block->new_cpl_leak);
+        if (!s->eac3 || block->new_cpl_leak != 2)
+            put_bits(&s->pb, 1, block->new_cpl_leak);
         if (block->new_cpl_leak) {
             put_bits(&s->pb, 3, s->bit_alloc.cpl_fast_leak);
             put_bits(&s->pb, 3, s->bit_alloc.cpl_slow_leak);
         }
     }
 
-    put_bits(&s->pb, 1, 0); /* no delta bit allocation */
-    put_bits(&s->pb, 1, 0); /* no data to skip */
+    if (!s->eac3) {
+        put_bits(&s->pb, 1, 0); /* no delta bit allocation */
+        put_bits(&s->pb, 1, 0); /* no data to skip */
+    }
 
     /* mantissas */
     got_cpl = !block->cpl_in_use;
     for (ch = 1; ch <= s->channels; ch++) {
         int b, q;
-        AC3Block *ref_block;
 
         if (!got_cpl && ch > 1 && block->channel_in_cpl[ch-1]) {
             ch0     = ch - 1;
             ch      = CPL_CH;
             got_cpl = 1;
         }
-        ref_block = block->exp_ref_block[ch];
         for (i = s->start_freq[ch]; i < block->end_freq[ch]; i++) {
             q = block->qmant[ch][i];
-            b = ref_block->bap[ch][i];
+            b = s->ref_bap[ch][blk][i];
             switch (b) {
             case 0:                                         break;
             case 1: if (q != 128) put_bits(&s->pb,   5, q); break;
@@ -2022,6 +2204,10 @@ static void output_frame_end(AC3EncodeContext *s)
     if (pad_bytes > 0)
         memset(put_bits_ptr(&s->pb), 0, pad_bytes);
 
+    if (s->eac3) {
+        /* compute crc2 */
+        crc2_partial = av_crc(crc_ctx, 0, frame + 2, s->frame_size - 5);
+    } else {
     /* compute crc1 */
     /* this is not so easy because it is at the beginning of the data... */
     crc1    = av_bswap16(av_crc(crc_ctx, 0, frame + 4, frame_size_58 - 4));
@@ -2032,6 +2218,7 @@ static void output_frame_end(AC3EncodeContext *s)
     /* compute crc2 */
     crc2_partial = av_crc(crc_ctx, 0, frame + frame_size_58,
                           s->frame_size - frame_size_58 - 3);
+    }
     crc2 = av_crc(crc_ctx, crc2_partial, frame + s->frame_size - 3, 1);
     /* ensure crc2 does not match sync word by flipping crcrsv bit if needed */
     if (crc2 == 0x770B) {
@@ -2052,7 +2239,10 @@ static void output_frame(AC3EncodeContext *s, unsigned char *frame)
 
     init_put_bits(&s->pb, frame, AC3_MAX_CODED_FRAME_SIZE);
 
-    output_frame_header(s);
+    if (s->eac3)
+        eac3_output_frame_header(s);
+    else
+        ac3_output_frame_header(s);
 
     for (blk = 0; blk < AC3_MAX_BLOCKS; blk++)
         output_audio_block(s, blk);
@@ -2069,10 +2259,11 @@ static void dprint_options(AVCodecContext *avctx)
     char strbuf[32];
 
     switch (s->bitstream_id) {
-    case  6:  av_strlcpy(strbuf, "AC-3 (alt syntax)", 32);      break;
-    case  8:  av_strlcpy(strbuf, "AC-3 (standard)", 32);        break;
-    case  9:  av_strlcpy(strbuf, "AC-3 (dnet half-rate)", 32);  break;
-    case 10:  av_strlcpy(strbuf, "AC-3 (dnet quater-rate", 32); break;
+    case  6:  av_strlcpy(strbuf, "AC-3 (alt syntax)",       32); break;
+    case  8:  av_strlcpy(strbuf, "AC-3 (standard)",         32); break;
+    case  9:  av_strlcpy(strbuf, "AC-3 (dnet half-rate)",   32); break;
+    case 10:  av_strlcpy(strbuf, "AC-3 (dnet quater-rate)", 32); break;
+    case 16:  av_strlcpy(strbuf, "E-AC-3 (enhanced)",       32); break;
     default: snprintf(strbuf, 32, "ERROR");
     }
     av_dlog(avctx, "bitstream_id: %s (%d)\n", strbuf, s->bitstream_id);
@@ -2333,13 +2524,13 @@ static int ac3_encode_frame(AVCodecContext *avctx, unsigned char *frame,
     const SampleType *samples = data;
     int ret;
 
-    if (s->options.allow_per_frame_metadata) {
+    if (!s->eac3 && s->options.allow_per_frame_metadata) {
         ret = validate_metadata(avctx);
         if (ret)
             return ret;
     }
 
-    if (s->bit_alloc.sr_code == 1)
+    if (s->bit_alloc.sr_code == 1 || s->eac3)
         adjust_frame_size(s);
 
     deinterleave_input_samples(s, samples);
@@ -2397,7 +2588,6 @@ static av_cold int ac3_encode_close(AVCodecContext *avctx)
     av_freep(&s->qmant_buffer);
     for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
         AC3Block *block = &s->blocks[blk];
-        av_freep(&block->bap);
         av_freep(&block->mdct_coef);
         av_freep(&block->fixed_coef);
         av_freep(&block->exp);
@@ -2465,7 +2655,7 @@ static av_cold int set_channel_info(AC3EncodeContext *s, int channels,
 
 static av_cold int validate_options(AVCodecContext *avctx, AC3EncodeContext *s)
 {
-    int i, ret;
+    int i, ret, max_sr;
 
     /* validate channel layout */
     if (!avctx->channel_layout) {
@@ -2480,30 +2670,72 @@ static av_cold int validate_options(AVCodecContext *avctx, AC3EncodeContext *s)
     }
 
     /* validate sample rate */
-    for (i = 0; i < 9; i++) {
-        if ((ff_ac3_sample_rate_tab[i / 3] >> (i % 3)) == avctx->sample_rate)
+    /* note: max_sr could be changed from 2 to 5 for E-AC-3 once we find a
+             decoder that supports half sample rate so we can validate that
+             the generated files are correct. */
+    max_sr = s->eac3 ? 2 : 8;
+    for (i = 0; i <= max_sr; i++) {
+        if ((ff_ac3_sample_rate_tab[i % 3] >> (i / 3)) == avctx->sample_rate)
             break;
     }
-    if (i == 9) {
+    if (i > max_sr) {
         av_log(avctx, AV_LOG_ERROR, "invalid sample rate\n");
         return AVERROR(EINVAL);
     }
     s->sample_rate        = avctx->sample_rate;
-    s->bit_alloc.sr_shift = i % 3;
-    s->bit_alloc.sr_code  = i / 3;
-    s->bitstream_id       = 8 + s->bit_alloc.sr_shift;
+    s->bit_alloc.sr_shift = i / 3;
+    s->bit_alloc.sr_code  = i % 3;
+    s->bitstream_id       = s->eac3 ? 16 : 8 + s->bit_alloc.sr_shift;
 
     /* validate bit rate */
-    for (i = 0; i < 19; i++) {
-        if ((ff_ac3_bitrate_tab[i] >> s->bit_alloc.sr_shift)*1000 == avctx->bit_rate)
-            break;
-    }
-    if (i == 19) {
-        av_log(avctx, AV_LOG_ERROR, "invalid bit rate\n");
-        return AVERROR(EINVAL);
+    if (s->eac3) {
+        int max_br, min_br, wpf, min_br_dist, min_br_code;
+
+        /* calculate min/max bitrate */
+        max_br = 2048 * s->sample_rate / AC3_FRAME_SIZE * 16;
+        min_br = ((s->sample_rate + (AC3_FRAME_SIZE-1)) / AC3_FRAME_SIZE) * 16;
+        if (avctx->bit_rate < min_br || avctx->bit_rate > max_br) {
+            av_log(avctx, AV_LOG_ERROR, "invalid bit rate. must be %d to %d "
+                   "for this sample rate\n", min_br, max_br);
+            return AVERROR(EINVAL);
+        }
+
+        /* calculate words-per-frame for the selected bitrate */
+        wpf = (avctx->bit_rate / 16) * AC3_FRAME_SIZE / s->sample_rate;
+        av_assert1(wpf > 0 && wpf <= 2048);
+
+        /* find the closest AC-3 bitrate code to the selected bitrate.
+           this is needed for lookup tables for bandwidth and coupling
+           parameter selection */
+        min_br_code = -1;
+        min_br_dist = INT_MAX;
+        for (i = 0; i < 19; i++) {
+            int br_dist = abs(ff_ac3_bitrate_tab[i] * 1000 - avctx->bit_rate);
+            if (br_dist < min_br_dist) {
+                min_br_dist = br_dist;
+                min_br_code = i;
+            }
+        }
+
+        /* make sure the minimum frame size is below the average frame size */
+        s->frame_size_code = min_br_code << 1;
+        while (wpf > 1 && wpf * s->sample_rate / AC3_FRAME_SIZE * 16 > avctx->bit_rate)
+            wpf--;
+        s->frame_size_min = 2 * wpf;
+    } else {
+        for (i = 0; i < 19; i++) {
+            if ((ff_ac3_bitrate_tab[i] >> s->bit_alloc.sr_shift)*1000 == avctx->bit_rate)
+                break;
+        }
+        if (i == 19) {
+            av_log(avctx, AV_LOG_ERROR, "invalid bit rate\n");
+            return AVERROR(EINVAL);
+        }
+        s->frame_size_code = i << 1;
+        s->frame_size_min  = 2 * ff_ac3_frame_size_tab[s->frame_size_code][s->bit_alloc.sr_code];
     }
-    s->bit_rate        = avctx->bit_rate;
-    s->frame_size_code = i << 1;
+    s->bit_rate   = avctx->bit_rate;
+    s->frame_size = s->frame_size_min;
 
     /* validate cutoff */
     if (avctx->cutoff < 0) {
@@ -2526,9 +2758,11 @@ static av_cold int validate_options(AVCodecContext *avctx, AC3EncodeContext *s)
         return AVERROR(EINVAL);
     }
 
-    ret = validate_metadata(avctx);
-    if (ret)
-        return ret;
+    if (!s->eac3) {
+        ret = validate_metadata(avctx);
+        if (ret)
+            return ret;
+    }
 
     s->rematrixing_enabled = s->options.stereo_rematrixing &&
                              (s->channel_mode == AC3_CHMODE_STEREO);
@@ -2652,8 +2886,6 @@ static av_cold int allocate_buffers(AVCodecContext *avctx)
     }
     for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
         AC3Block *block = &s->blocks[blk];
-        FF_ALLOC_OR_GOTO(avctx, block->bap, channels * sizeof(*block->bap),
-                         alloc_fail);
         FF_ALLOCZ_OR_GOTO(avctx, block->mdct_coef, channels * sizeof(*block->mdct_coef),
                           alloc_fail);
         FF_ALLOCZ_OR_GOTO(avctx, block->exp, channels * sizeof(*block->exp),
@@ -2677,7 +2909,6 @@ static av_cold int allocate_buffers(AVCodecContext *avctx)
 
         for (ch = 0; ch < channels; ch++) {
             /* arrangement: block, channel, coeff */
-            block->bap[ch]         = &s->bap_buffer        [AC3_MAX_COEFS * (blk * channels + ch)];
             block->grouped_exp[ch] = &s->grouped_exp_buffer[128           * (blk * channels + ch)];
             block->psd[ch]         = &s->psd_buffer        [AC3_MAX_COEFS * (blk * channels + ch)];
             block->band_psd[ch]    = &s->band_psd_buffer   [64            * (blk * channels + ch)];
@@ -2728,6 +2959,8 @@ static av_cold int ac3_encode_init(AVCodecContext *avctx)
     AC3EncodeContext *s = avctx->priv_data;
     int ret, frame_size_58;
 
+    s->eac3 = avctx->codec_id == CODEC_ID_EAC3;
+
     avctx->frame_size = AC3_FRAME_SIZE;
 
     ff_ac3_common_init();
@@ -2740,10 +2973,8 @@ static av_cold int ac3_encode_init(AVCodecContext *avctx)
     if (s->bitstream_mode == AV_AUDIO_SERVICE_TYPE_KARAOKE)
         s->bitstream_mode = 0x7;
 
-    s->frame_size_min  = 2 * ff_ac3_frame_size_tab[s->frame_size_code][s->bit_alloc.sr_code];
     s->bits_written    = 0;
     s->samples_written = 0;
-    s->frame_size      = s->frame_size_min;
 
     /* calculate crc_inv for both possible frame sizes */
     frame_size_58 = (( s->frame_size    >> 2) + ( s->frame_size    >> 4)) << 1;
diff --git a/libavcodec/ac3enc_combined.c b/libavcodec/ac3enc_combined.c
index 3d6b3d9335..db832f1f03 100644
--- a/libavcodec/ac3enc_combined.c
+++ b/libavcodec/ac3enc_combined.c
@@ -11,8 +11,18 @@ typedef struct CombineContext{
     AVCodec *codec;
 }CombineContext;
 
+#define OFFSET(param) offsetof(CombineContext, options.param)
+#define AC3ENC_PARAM (AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_ENCODING_PARAM)
+
+#define AC3ENC_TYPE_AC3_FIXED   0
+#define AC3ENC_TYPE_AC3         1
+#define AC3ENC_TYPE_EAC3        2
+
+#define AC3ENC_TYPE 12354
+#include "ac3enc_opts_template.c"
+
 static AVClass ac3enc_class = { "AC-3 Encoder", av_default_item_name,
-                                ff_ac3_options, LIBAVUTIL_VERSION_INT };
+                                eac3_options, LIBAVUTIL_VERSION_INT };
 
 static av_cold AVCodec *get_codec(enum AVSampleFormat s){
 #if CONFIG_AC3_FIXED_ENCODER
diff --git a/libavcodec/ac3enc_float.c b/libavcodec/ac3enc_float.c
index 01ffdf37d2..2ab24db561 100644
--- a/libavcodec/ac3enc_float.c
+++ b/libavcodec/ac3enc_float.c
@@ -100,6 +100,7 @@ static void scale_coefficients(AC3EncodeContext *s)
 }
 
 
+#if CONFIG_AC3_ENCODER
 AVCodec ff_ac3_float_encoder = {
     "ac3_float",
     AVMEDIA_TYPE_AUDIO,
@@ -114,3 +115,20 @@ AVCodec ff_ac3_float_encoder = {
     .priv_class = &ac3enc_class,
     .channel_layouts = ff_ac3_channel_layouts,
 };
+#endif
+
+#if CONFIG_EAC3_ENCODER
+AVCodec ff_eac3_encoder = {
+    .name            = "eac3",
+    .type            = AVMEDIA_TYPE_AUDIO,
+    .id              = CODEC_ID_EAC3,
+    .priv_data_size  = sizeof(AC3EncodeContext),
+    .init            = ac3_encode_init,
+    .encode          = ac3_encode_frame,
+    .close           = ac3_encode_close,
+    .sample_fmts     = (const enum AVSampleFormat[]){AV_SAMPLE_FMT_FLT,AV_SAMPLE_FMT_NONE},
+    .long_name       = NULL_IF_CONFIG_SMALL("ATSC A/52 E-AC-3"),
+    .priv_class      = &eac3enc_class,
+    .channel_layouts = ff_ac3_channel_layouts,
+};
+#endif
diff --git a/libavcodec/ac3enc_opts_template.c b/libavcodec/ac3enc_opts_template.c
new file mode 100644
index 0000000000..699c1b5982
--- /dev/null
+++ b/libavcodec/ac3enc_opts_template.c
@@ -0,0 +1,81 @@
+/*
+ * AC-3 encoder options
+ * Copyright (c) 2010 Justin Ruggles <justin.ruggles@gmail.com>
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#if AC3ENC_TYPE == AC3ENC_TYPE_AC3_FIXED
+static const AVOption ac3fixed_options[] = {
+#elif AC3ENC_TYPE == AC3ENC_TYPE_AC3
+static const AVOption ac3_options[] = {
+#else /* AC3ENC_TYPE_EAC3 */
+static const AVOption eac3_options[] = {
+#endif
+#if AC3ENC_TYPE != AC3ENC_TYPE_EAC3
+/* Metadata Options */
+{"per_frame_metadata", "Allow Changing Metadata Per-Frame", OFFSET(allow_per_frame_metadata), FF_OPT_TYPE_INT, {.dbl = 0 }, 0, 1, AC3ENC_PARAM},
+/* downmix levels */
+{"center_mixlev", "Center Mix Level", OFFSET(center_mix_level), FF_OPT_TYPE_FLOAT, {.dbl = LEVEL_MINUS_4POINT5DB }, 0.0, 1.0, AC3ENC_PARAM},
+{"surround_mixlev", "Surround Mix Level", OFFSET(surround_mix_level), FF_OPT_TYPE_FLOAT, {.dbl = LEVEL_MINUS_6DB }, 0.0, 1.0, AC3ENC_PARAM},
+/* audio production information */
+{"mixing_level", "Mixing Level", OFFSET(mixing_level), FF_OPT_TYPE_INT, {.dbl = -1 }, -1, 111, AC3ENC_PARAM},
+{"room_type", "Room Type", OFFSET(room_type), FF_OPT_TYPE_INT, {.dbl = -1 }, -1, 2, AC3ENC_PARAM, "room_type"},
+    {"notindicated", "Not Indicated (default)", 0, FF_OPT_TYPE_CONST, {.dbl = 0 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "room_type"},
+    {"large",        "Large Room",              0, FF_OPT_TYPE_CONST, {.dbl = 1 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "room_type"},
+    {"small",        "Small Room",              0, FF_OPT_TYPE_CONST, {.dbl = 2 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "room_type"},
+/* other metadata options */
+{"copyright", "Copyright Bit", OFFSET(copyright), FF_OPT_TYPE_INT, {.dbl = 0 }, 0, 1, AC3ENC_PARAM},
+#endif
+{"dialnorm", "Dialogue Level (dB)", OFFSET(dialogue_level), FF_OPT_TYPE_INT, {.dbl = -31 }, -31, -1, AC3ENC_PARAM},
+#if AC3ENC_TYPE != AC3ENC_TYPE_EAC3
+{"dsur_mode", "Dolby Surround Mode", OFFSET(dolby_surround_mode), FF_OPT_TYPE_INT, {.dbl = 0 }, 0, 2, AC3ENC_PARAM, "dsur_mode"},
+    {"notindicated", "Not Indicated (default)",    0, FF_OPT_TYPE_CONST, {.dbl = 0 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "dsur_mode"},
+    {"on",           "Dolby Surround Encoded",     0, FF_OPT_TYPE_CONST, {.dbl = 1 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "dsur_mode"},
+    {"off",          "Not Dolby Surround Encoded", 0, FF_OPT_TYPE_CONST, {.dbl = 2 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "dsur_mode"},
+{"original", "Original Bit Stream", OFFSET(original), FF_OPT_TYPE_INT,   {.dbl = 1 }, 0, 1, AC3ENC_PARAM},
+/* extended bitstream information */
+{"dmix_mode", "Preferred Stereo Downmix Mode", OFFSET(preferred_stereo_downmix), FF_OPT_TYPE_INT, {.dbl = -1 }, -1, 2, AC3ENC_PARAM, "dmix_mode"},
+    {"notindicated", "Not Indicated (default)", 0, FF_OPT_TYPE_CONST, {.dbl = 0 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "dmix_mode"},
+    {"ltrt", "Lt/Rt Downmix Preferred",         0, FF_OPT_TYPE_CONST, {.dbl = 1 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "dmix_mode"},
+    {"loro", "Lo/Ro Downmix Preferred",         0, FF_OPT_TYPE_CONST, {.dbl = 2 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "dmix_mode"},
+{"ltrt_cmixlev", "Lt/Rt Center Mix Level", OFFSET(ltrt_center_mix_level), FF_OPT_TYPE_FLOAT, {.dbl = -1.0 }, -1.0, 2.0, AC3ENC_PARAM},
+{"ltrt_surmixlev", "Lt/Rt Surround Mix Level", OFFSET(ltrt_surround_mix_level), FF_OPT_TYPE_FLOAT, {.dbl = -1.0 }, -1.0, 2.0, AC3ENC_PARAM},
+{"loro_cmixlev", "Lo/Ro Center Mix Level", OFFSET(loro_center_mix_level), FF_OPT_TYPE_FLOAT, {.dbl = -1.0 }, -1.0, 2.0, AC3ENC_PARAM},
+{"loro_surmixlev", "Lo/Ro Surround Mix Level", OFFSET(loro_surround_mix_level), FF_OPT_TYPE_FLOAT, {.dbl = -1.0 }, -1.0, 2.0, AC3ENC_PARAM},
+{"dsurex_mode", "Dolby Surround EX Mode", OFFSET(dolby_surround_ex_mode), FF_OPT_TYPE_INT, {.dbl = -1 }, -1, 2, AC3ENC_PARAM, "dsurex_mode"},
+    {"notindicated", "Not Indicated (default)",       0, FF_OPT_TYPE_CONST, {.dbl = 0 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "dsurex_mode"},
+    {"on",           "Dolby Surround EX Encoded",     0, FF_OPT_TYPE_CONST, {.dbl = 1 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "dsurex_mode"},
+    {"off",          "Not Dolby Surround EX Encoded", 0, FF_OPT_TYPE_CONST, {.dbl = 2 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "dsurex_mode"},
+{"dheadphone_mode", "Dolby Headphone Mode", OFFSET(dolby_headphone_mode), FF_OPT_TYPE_INT, {.dbl = -1 }, -1, 2, AC3ENC_PARAM, "dheadphone_mode"},
+    {"notindicated", "Not Indicated (default)",     0, FF_OPT_TYPE_CONST, {.dbl = 0 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "dheadphone_mode"},
+    {"on",           "Dolby Headphone Encoded",     0, FF_OPT_TYPE_CONST, {.dbl = 1 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "dheadphone_mode"},
+    {"off",          "Not Dolby Headphone Encoded", 0, FF_OPT_TYPE_CONST, {.dbl = 2 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "dheadphone_mode"},
+{"ad_conv_type", "A/D Converter Type", OFFSET(ad_converter_type), FF_OPT_TYPE_INT, {.dbl = -1 }, -1, 1, AC3ENC_PARAM, "ad_conv_type"},
+    {"standard", "Standard (default)", 0, FF_OPT_TYPE_CONST, {.dbl = 0 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "ad_conv_type"},
+    {"hdcd",     "HDCD",               0, FF_OPT_TYPE_CONST, {.dbl = 1 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "ad_conv_type"},
+#endif
+/* Other Encoding Options */
+{"stereo_rematrixing", "Stereo Rematrixing", OFFSET(stereo_rematrixing), FF_OPT_TYPE_INT, {.dbl = 1 }, 0, 1, AC3ENC_PARAM},
+#if AC3ENC_TYPE != AC3ENC_TYPE_AC3_FIXED
+{"channel_coupling",   "Channel Coupling",   OFFSET(channel_coupling),   FF_OPT_TYPE_INT, {.dbl = 1 }, 0, 1, AC3ENC_PARAM, "channel_coupling"},
+    {"auto", "Selected by the Encoder", 0, FF_OPT_TYPE_CONST, {.dbl = -1 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "channel_coupling"},
+{"cpl_start_band", "Coupling Start Band", OFFSET(cpl_start), FF_OPT_TYPE_INT, {.dbl = -1 }, -1, 15, AC3ENC_PARAM, "cpl_start_band"},
+    {"auto", "Selected by the Encoder", 0, FF_OPT_TYPE_CONST, {.dbl = -1 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "cpl_start_band"},
+#endif
+{NULL}
+};
diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c
index 2ad95bd790..e6305cf7d2 100644
--- a/libavcodec/allcodecs.c
+++ b/libavcodec/allcodecs.c
@@ -253,7 +253,7 @@ void avcodec_register_all(void)
     REGISTER_DECODER (COOK, cook);
     REGISTER_ENCDEC  (DCA, dca);
     REGISTER_DECODER (DSICINAUDIO, dsicinaudio);
-    REGISTER_DECODER (EAC3, eac3);
+    REGISTER_ENCDEC  (EAC3, eac3);
     REGISTER_ENCDEC  (FLAC, flac);
     REGISTER_DECODER (GSM, gsm);
     REGISTER_DECODER (GSM_MS, gsm_ms);
diff --git a/libavcodec/arm/Makefile b/libavcodec/arm/Makefile
index a5abfdd128..a5a5dfab64 100644
--- a/libavcodec/arm/Makefile
+++ b/libavcodec/arm/Makefile
@@ -1,5 +1,4 @@
 OBJS-$(CONFIG_AC3DSP)                  += arm/ac3dsp_init_arm.o         \
-                                          arm/ac3dsp_arm.o
 
 OBJS-$(CONFIG_DCA_DECODER)             += arm/dcadsp_init_arm.o         \
 
diff --git a/libavcodec/arm/aac.h b/libavcodec/arm/aac.h
index 390cdbfcf9..3b14c094c6 100644
--- a/libavcodec/arm/aac.h
+++ b/libavcodec/arm/aac.h
@@ -30,17 +30,17 @@ static inline float *VMUL2(float *dst, const float *v, unsigned idx,
                            const float *scale)
 {
     unsigned v0, v1;
-    __asm__ volatile ("ubfx     %0,  %4,  #0, #4      \n\t"
-                      "ubfx     %1,  %4,  #4, #4      \n\t"
-                      "ldr      %0,  [%3, %0, lsl #2] \n\t"
-                      "ldr      %1,  [%3, %1, lsl #2] \n\t"
-                      "vld1.32  {d1[]},   [%5,:32]    \n\t"
-                      "vmov     d0,  %0,  %1          \n\t"
-                      "vmul.f32 d0,  d0,  d1          \n\t"
-                      "vst1.32  {d0},     [%2,:64]!   \n\t"
-                      : "=&r"(v0), "=&r"(v1), "+r"(dst)
-                      : "r"(v), "r"(idx), "r"(scale)
-                      : "d0", "d1");
+    __asm__ ("ubfx     %0,  %6,  #0, #4      \n\t"
+             "ubfx     %1,  %6,  #4, #4      \n\t"
+             "ldr      %0,  [%5, %0, lsl #2] \n\t"
+             "ldr      %1,  [%5, %1, lsl #2] \n\t"
+             "vld1.32  {d1[]},   [%7,:32]    \n\t"
+             "vmov     d0,  %0,  %1          \n\t"
+             "vmul.f32 d0,  d0,  d1          \n\t"
+             "vst1.32  {d0},     [%2,:64]!   \n\t"
+             : "=&r"(v0), "=&r"(v1), "+r"(dst), "=m"(dst[0]), "=m"(dst[1])
+             : "r"(v), "r"(idx), "r"(scale)
+             : "d0", "d1");
     return dst;
 }
 
@@ -49,22 +49,23 @@ static inline float *VMUL4(float *dst, const float *v, unsigned idx,
                            const float *scale)
 {
     unsigned v0, v1, v2, v3;
-    __asm__ volatile ("ubfx     %0,  %6,  #0, #2      \n\t"
-                      "ubfx     %1,  %6,  #2, #2      \n\t"
-                      "ldr      %0,  [%5, %0, lsl #2] \n\t"
-                      "ubfx     %2,  %6,  #4, #2      \n\t"
-                      "ldr      %1,  [%5, %1, lsl #2] \n\t"
-                      "ubfx     %3,  %6,  #6, #2      \n\t"
-                      "ldr      %2,  [%5, %2, lsl #2] \n\t"
-                      "vmov     d0,  %0,  %1          \n\t"
-                      "ldr      %3,  [%5, %3, lsl #2] \n\t"
-                      "vld1.32  {d2[],d3[]},[%7,:32]  \n\t"
-                      "vmov     d1,  %2,  %3          \n\t"
-                      "vmul.f32 q0,  q0,  q1          \n\t"
-                      "vst1.32  {q0},     [%4,:128]!  \n\t"
-                      : "=&r"(v0), "=&r"(v1), "=&r"(v2), "=&r"(v3), "+r"(dst)
-                      : "r"(v), "r"(idx), "r"(scale)
-                      : "d0", "d1", "d2", "d3");
+    __asm__ ("ubfx     %0,  %10, #0, #2      \n\t"
+             "ubfx     %1,  %10, #2, #2      \n\t"
+             "ldr      %0,  [%9, %0, lsl #2] \n\t"
+             "ubfx     %2,  %10, #4, #2      \n\t"
+             "ldr      %1,  [%9, %1, lsl #2] \n\t"
+             "ubfx     %3,  %10, #6, #2      \n\t"
+             "ldr      %2,  [%9, %2, lsl #2] \n\t"
+             "vmov     d0,  %0,  %1          \n\t"
+             "ldr      %3,  [%9, %3, lsl #2] \n\t"
+             "vld1.32  {d2[],d3[]},[%11,:32] \n\t"
+             "vmov     d1,  %2,  %3          \n\t"
+             "vmul.f32 q0,  q0,  q1          \n\t"
+             "vst1.32  {q0},     [%4,:128]!  \n\t"
+             : "=&r"(v0), "=&r"(v1), "=&r"(v2), "=&r"(v3), "+r"(dst),
+               "=m"(dst[0]), "=m"(dst[1]), "=m"(dst[2]), "=m"(dst[3])
+             : "r"(v), "r"(idx), "r"(scale)
+             : "d0", "d1", "d2", "d3");
     return dst;
 }
 
@@ -73,22 +74,23 @@ static inline float *VMUL2S(float *dst, const float *v, unsigned idx,
                             unsigned sign, const float *scale)
 {
     unsigned v0, v1, v2, v3;
-    __asm__ volatile ("ubfx     %0,  %6,  #0, #4      \n\t"
-                      "ubfx     %1,  %6,  #4, #4      \n\t"
-                      "ldr      %0,  [%5, %0, lsl #2] \n\t"
-                      "lsl      %2,  %8,  #30         \n\t"
-                      "ldr      %1,  [%5, %1, lsl #2] \n\t"
-                      "lsl      %3,  %8,  #31         \n\t"
-                      "vmov     d0,  %0,  %1          \n\t"
-                      "bic      %2,  %2,  #1<<30      \n\t"
-                      "vld1.32  {d1[]},   [%7,:32]    \n\t"
-                      "vmov     d2,  %2,  %3          \n\t"
-                      "veor     d0,  d0,  d2          \n\t"
-                      "vmul.f32 d0,  d0,  d1          \n\t"
-                      "vst1.32  {d0},     [%4,:64]!   \n\t"
-                      : "=&r"(v0), "=&r"(v1), "=&r"(v2), "=&r"(v3), "+r"(dst)
-                      : "r"(v), "r"(idx), "r"(scale), "r"(sign)
-                      : "d0", "d1", "d2");
+    __asm__ ("ubfx     %0,  %8,  #0, #4      \n\t"
+             "ubfx     %1,  %8,  #4, #4      \n\t"
+             "ldr      %0,  [%7, %0, lsl #2] \n\t"
+             "lsl      %2,  %10, #30         \n\t"
+             "ldr      %1,  [%7, %1, lsl #2] \n\t"
+             "lsl      %3,  %10, #31         \n\t"
+             "vmov     d0,  %0,  %1          \n\t"
+             "bic      %2,  %2,  #1<<30      \n\t"
+             "vld1.32  {d1[]},   [%9,:32]    \n\t"
+             "vmov     d2,  %2,  %3          \n\t"
+             "veor     d0,  d0,  d2          \n\t"
+             "vmul.f32 d0,  d0,  d1          \n\t"
+             "vst1.32  {d0},     [%4,:64]!   \n\t"
+             : "=&r"(v0), "=&r"(v1), "=&r"(v2), "=&r"(v3), "+r"(dst),
+               "=m"(dst[0]), "=m"(dst[1])
+             : "r"(v), "r"(idx), "r"(scale), "r"(sign)
+             : "d0", "d1", "d2");
     return dst;
 }
 
@@ -97,38 +99,39 @@ static inline float *VMUL4S(float *dst, const float *v, unsigned idx,
                             unsigned sign, const float *scale)
 {
     unsigned v0, v1, v2, v3, nz;
-    __asm__ volatile ("vld1.32  {d2[],d3[]},[%9,:32]  \n\t"
-                      "ubfx     %0,  %8,  #0, #2      \n\t"
-                      "ubfx     %1,  %8,  #2, #2      \n\t"
-                      "ldr      %0,  [%7, %0, lsl #2] \n\t"
-                      "ubfx     %2,  %8,  #4, #2      \n\t"
-                      "ldr      %1,  [%7, %1, lsl #2] \n\t"
-                      "ubfx     %3,  %8,  #6, #2      \n\t"
-                      "ldr      %2,  [%7, %2, lsl #2] \n\t"
-                      "vmov     d0,  %0,  %1          \n\t"
-                      "ldr      %3,  [%7, %3, lsl #2] \n\t"
-                      "lsr      %6,  %8,  #12         \n\t"
-                      "rbit     %6,  %6               \n\t"
-                      "vmov     d1,  %2,  %3          \n\t"
-                      "lsls     %6,  %6,  #1          \n\t"
-                      "and      %0,  %5,  #1<<31      \n\t"
-                      "lslcs    %5,  %5,  #1          \n\t"
-                      "lsls     %6,  %6,  #1          \n\t"
-                      "and      %1,  %5,  #1<<31      \n\t"
-                      "lslcs    %5,  %5,  #1          \n\t"
-                      "lsls     %6,  %6,  #1          \n\t"
-                      "and      %2,  %5,  #1<<31      \n\t"
-                      "lslcs    %5,  %5,  #1          \n\t"
-                      "vmov     d4,  %0,  %1          \n\t"
-                      "and      %3,  %5,  #1<<31      \n\t"
-                      "vmov     d5,  %2,  %3          \n\t"
-                      "veor     q0,  q0,  q2          \n\t"
-                      "vmul.f32 q0,  q0,  q1          \n\t"
-                      "vst1.32  {q0},     [%4,:128]!  \n\t"
-                      : "=&r"(v0), "=&r"(v1), "=&r"(v2), "=&r"(v3), "+r"(dst),
-                        "+r"(sign), "=r"(nz)
-                      : "r"(v), "r"(idx), "r"(scale)
-                      : "d0", "d1", "d2", "d3", "d4", "d5");
+    __asm__ ("vld1.32  {d2[],d3[]},[%13,:32] \n\t"
+             "ubfx     %0,  %12, #0, #2      \n\t"
+             "ubfx     %1,  %12, #2, #2      \n\t"
+             "ldr      %0,  [%11,%0, lsl #2] \n\t"
+             "ubfx     %2,  %12, #4, #2      \n\t"
+             "ldr      %1,  [%11,%1, lsl #2] \n\t"
+             "ubfx     %3,  %12, #6, #2      \n\t"
+             "ldr      %2,  [%11,%2, lsl #2] \n\t"
+             "vmov     d0,  %0,  %1          \n\t"
+             "ldr      %3,  [%11,%3, lsl #2] \n\t"
+             "lsr      %6,  %12, #12         \n\t"
+             "rbit     %6,  %6               \n\t"
+             "vmov     d1,  %2,  %3          \n\t"
+             "lsls     %6,  %6,  #1          \n\t"
+             "and      %0,  %5,  #1<<31      \n\t"
+             "lslcs    %5,  %5,  #1          \n\t"
+             "lsls     %6,  %6,  #1          \n\t"
+             "and      %1,  %5,  #1<<31      \n\t"
+             "lslcs    %5,  %5,  #1          \n\t"
+             "lsls     %6,  %6,  #1          \n\t"
+             "and      %2,  %5,  #1<<31      \n\t"
+             "lslcs    %5,  %5,  #1          \n\t"
+             "vmov     d4,  %0,  %1          \n\t"
+             "and      %3,  %5,  #1<<31      \n\t"
+             "vmov     d5,  %2,  %3          \n\t"
+             "veor     q0,  q0,  q2          \n\t"
+             "vmul.f32 q0,  q0,  q1          \n\t"
+             "vst1.32  {q0},     [%4,:128]!  \n\t"
+             : "=&r"(v0), "=&r"(v1), "=&r"(v2), "=&r"(v3), "+r"(dst),
+               "+r"(sign), "=r"(nz),
+               "=m"(dst[0]), "=m"(dst[1]), "=m"(dst[2]), "=m"(dst[3])
+             : "r"(v), "r"(idx), "r"(scale)
+             : "cc", "d0", "d1", "d2", "d3", "d4", "d5");
     return dst;
 }
 
diff --git a/libavcodec/arm/ac3dsp_arm.S b/libavcodec/arm/ac3dsp_arm.S
deleted file mode 100644
index d7d498e41f..0000000000
--- a/libavcodec/arm/ac3dsp_arm.S
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Copyright (c) 2011 Mans Rullgard <mans@mansr.com>
- *
- * This file is part of Libav.
- *
- * Libav is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * Libav is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include "asm.S"
-
-function ff_ac3_compute_mantissa_size_arm, export=1
-        push            {r4-r8,lr}
-        ldm             r0,  {r4-r8}
-        mov             r3,  r0
-        mov             r0,  #0
-1:
-        ldrb            lr,  [r1], #1
-        subs            r2,  r2,  #1
-        blt             2f
-        cmp             lr,  #4
-        bgt             3f
-        subs            lr,  lr,  #1
-        addlt           r4,  r4,  #1
-        addeq           r5,  r5,  #1
-        ble             1b
-        subs            lr,  lr,  #2
-        addlt           r6,  r6,  #1
-        addeq           r7,  r7,  #1
-        addgt           r8,  r8,  #1
-        b               1b
-3:
-        cmp             lr,  #14
-        sublt           lr,  lr,  #1
-        addgt           r0,  r0,  #16
-        addle           r0,  r0,  lr
-        b               1b
-2:
-        stm             r3,  {r4-r8}
-        pop             {r4-r8,pc}
-endfunc
diff --git a/libavcodec/arm/ac3dsp_init_arm.c b/libavcodec/arm/ac3dsp_init_arm.c
index 9f01507853..65790cdc51 100644
--- a/libavcodec/arm/ac3dsp_init_arm.c
+++ b/libavcodec/arm/ac3dsp_init_arm.c
@@ -39,8 +39,6 @@ int ff_ac3_compute_mantissa_size_arm(int cnt[5], uint8_t *bap, int nb_coefs);
 
 av_cold void ff_ac3dsp_init_arm(AC3DSPContext *c, int bit_exact)
 {
-    c->compute_mantissa_size     = ff_ac3_compute_mantissa_size_arm;
-
     if (HAVE_ARMV6) {
         c->bit_alloc_calc_bap    = ff_ac3_bit_alloc_calc_bap_armv6;
     }
diff --git a/libavcodec/arm/mathops.h b/libavcodec/arm/mathops.h
index 2244fa19ae..b4fb371739 100644
--- a/libavcodec/arm/mathops.h
+++ b/libavcodec/arm/mathops.h
@@ -97,7 +97,7 @@ static inline av_const int MUL16(int ra, int rb)
 static inline av_const int mid_pred(int a, int b, int c)
 {
     int m;
-    __asm__ volatile (
+    __asm__ (
         "mov   %0, %2  \n\t"
         "cmp   %1, %2  \n\t"
         "movgt %0, %1  \n\t"
@@ -107,7 +107,8 @@ static inline av_const int mid_pred(int a, int b, int c)
         "cmp   %0, %1  \n\t"
         "movgt %0, %1  \n\t"
         : "=&r"(m), "+r"(a)
-        : "r"(b), "r"(c));
+        : "r"(b), "r"(c)
+        : "cc");
     return m;
 }
 
diff --git a/libavcodec/arm/vp56_arith.h b/libavcodec/arm/vp56_arith.h
index 50a164d51c..cd02579e5b 100644
--- a/libavcodec/arm/vp56_arith.h
+++ b/libavcodec/arm/vp56_arith.h
@@ -31,24 +31,25 @@ static inline int vp56_rac_get_prob_armv6(VP56RangeCoder *c, int pr)
     unsigned high      = c->high << shift;
     unsigned bit;
 
-    __asm__ volatile ("adds    %3,  %3,  %0           \n"
-                      "cmpcs   %7,  %4                \n"
-                      "ldrcsh  %2,  [%4], #2          \n"
-                      "rsb     %0,  %6,  #256         \n"
-                      "smlabb  %0,  %5,  %6,  %0      \n"
-                      "rev16cs %2,  %2                \n"
-                      "orrcs   %1,  %1,  %2,  lsl %3  \n"
-                      "subcs   %3,  %3,  #16          \n"
-                      "lsr     %0,  %0,  #8           \n"
-                      "cmp     %1,  %0,  lsl #16      \n"
-                      "subge   %1,  %1,  %0,  lsl #16 \n"
-                      "subge   %0,  %5,  %0           \n"
-                      "movge   %2,  #1                \n"
-                      "movlt   %2,  #0                \n"
-                      : "=&r"(c->high), "=&r"(c->code_word), "=&r"(bit),
-                        "+&r"(c->bits), "+&r"(c->buffer)
-                      : "r"(high), "r"(pr), "r"(c->end - 1),
-                        "0"(shift), "1"(code_word));
+    __asm__ ("adds    %3,  %3,  %0           \n"
+             "cmpcs   %7,  %4                \n"
+             "ldrcsh  %2,  [%4], #2          \n"
+             "rsb     %0,  %6,  #256         \n"
+             "smlabb  %0,  %5,  %6,  %0      \n"
+             "rev16cs %2,  %2                \n"
+             "orrcs   %1,  %1,  %2,  lsl %3  \n"
+             "subcs   %3,  %3,  #16          \n"
+             "lsr     %0,  %0,  #8           \n"
+             "cmp     %1,  %0,  lsl #16      \n"
+             "subge   %1,  %1,  %0,  lsl #16 \n"
+             "subge   %0,  %5,  %0           \n"
+             "movge   %2,  #1                \n"
+             "movlt   %2,  #0                \n"
+             : "=&r"(c->high), "=&r"(c->code_word), "=&r"(bit),
+               "+&r"(c->bits), "+&r"(c->buffer)
+             : "r"(high), "r"(pr), "r"(c->end - 1),
+               "0"(shift), "1"(code_word)
+             : "cc");
 
     return bit;
 }
@@ -62,19 +63,20 @@ static inline int vp56_rac_get_prob_branchy_armv6(VP56RangeCoder *c, int pr)
     unsigned low;
     unsigned tmp;
 
-    __asm__ volatile ("adds    %3,  %3,  %0           \n"
-                      "cmpcs   %7,  %4                \n"
-                      "ldrcsh  %2,  [%4], #2          \n"
-                      "rsb     %0,  %6,  #256         \n"
-                      "smlabb  %0,  %5,  %6,  %0      \n"
-                      "rev16cs %2,  %2                \n"
-                      "orrcs   %1,  %1,  %2,  lsl %3  \n"
-                      "subcs   %3,  %3,  #16          \n"
-                      "lsr     %0,  %0,  #8           \n"
-                      "lsl     %2,  %0,  #16          \n"
-                      : "=&r"(low), "+&r"(code_word), "=&r"(tmp),
-                        "+&r"(c->bits), "+&r"(c->buffer)
-                      : "r"(high), "r"(pr), "r"(c->end - 1), "0"(shift));
+    __asm__ ("adds    %3,  %3,  %0           \n"
+             "cmpcs   %7,  %4                \n"
+             "ldrcsh  %2,  [%4], #2          \n"
+             "rsb     %0,  %6,  #256         \n"
+             "smlabb  %0,  %5,  %6,  %0      \n"
+             "rev16cs %2,  %2                \n"
+             "orrcs   %1,  %1,  %2,  lsl %3  \n"
+             "subcs   %3,  %3,  #16          \n"
+             "lsr     %0,  %0,  #8           \n"
+             "lsl     %2,  %0,  #16          \n"
+             : "=&r"(low), "+&r"(code_word), "=&r"(tmp),
+               "+&r"(c->bits), "+&r"(c->buffer)
+             : "r"(high), "r"(pr), "r"(c->end - 1), "0"(shift)
+             : "cc");
 
     if (code_word >= tmp) {
         c->high      = high - low;
diff --git a/libavcodec/x86/ac3dsp.asm b/libavcodec/x86/ac3dsp.asm
index b67f893f22..99c5df340e 100644
--- a/libavcodec/x86/ac3dsp.asm
+++ b/libavcodec/x86/ac3dsp.asm
@@ -27,6 +27,11 @@ SECTION_RODATA
 ; 16777216.0f - used in ff_float_to_fixed24()
 pf_1_24: times 4 dd 0x4B800000
 
+; used in ff_ac3_compute_mantissa_size()
+cextern ac3_bap_bits
+pw_bap_mul1: dw 21846, 21846, 0, 32768, 21846, 21846, 0, 32768
+pw_bap_mul2: dw 5, 7, 0, 7, 5, 7, 0, 7
+
 SECTION .text
 
 ;-----------------------------------------------------------------------------
@@ -293,3 +298,51 @@ cglobal float_to_fixed24_sse2, 3,3,9, dst, src, len
 %endif
     ja .loop
     REP_RET
+
+;------------------------------------------------------------------------------
+; int ff_ac3_compute_mantissa_size(uint16_t mant_cnt[6][16])
+;------------------------------------------------------------------------------
+
+%macro PHADDD4 2 ; xmm src, xmm tmp
+    movhlps  %2, %1
+    paddd    %1, %2
+    pshufd   %2, %1, 0x1
+    paddd    %1, %2
+%endmacro
+
+INIT_XMM
+cglobal ac3_compute_mantissa_size_sse2, 1,2,4, mant_cnt, sum
+    movdqa      m0, [mant_cntq      ]
+    movdqa      m1, [mant_cntq+ 1*16]
+    paddw       m0, [mant_cntq+ 2*16]
+    paddw       m1, [mant_cntq+ 3*16]
+    paddw       m0, [mant_cntq+ 4*16]
+    paddw       m1, [mant_cntq+ 5*16]
+    paddw       m0, [mant_cntq+ 6*16]
+    paddw       m1, [mant_cntq+ 7*16]
+    paddw       m0, [mant_cntq+ 8*16]
+    paddw       m1, [mant_cntq+ 9*16]
+    paddw       m0, [mant_cntq+10*16]
+    paddw       m1, [mant_cntq+11*16]
+    pmaddwd     m0, [ac3_bap_bits   ]
+    pmaddwd     m1, [ac3_bap_bits+16]
+    paddd       m0, m1
+    PHADDD4     m0, m1
+    movd      sumd, m0
+    movdqa      m3, [pw_bap_mul1]
+    movhpd      m0, [mant_cntq     +2]
+    movlpd      m0, [mant_cntq+1*32+2]
+    movhpd      m1, [mant_cntq+2*32+2]
+    movlpd      m1, [mant_cntq+3*32+2]
+    movhpd      m2, [mant_cntq+4*32+2]
+    movlpd      m2, [mant_cntq+5*32+2]
+    pmulhuw     m0, m3
+    pmulhuw     m1, m3
+    pmulhuw     m2, m3
+    paddusw     m0, m1
+    paddusw     m0, m2
+    pmaddwd     m0, [pw_bap_mul2]
+    PHADDD4     m0, m1
+    movd       eax, m0
+    add        eax, sumd
+    RET
diff --git a/libavcodec/x86/ac3dsp_mmx.c b/libavcodec/x86/ac3dsp_mmx.c
index 97d0657aa6..e853b8831a 100644
--- a/libavcodec/x86/ac3dsp_mmx.c
+++ b/libavcodec/x86/ac3dsp_mmx.c
@@ -42,6 +42,8 @@ extern void ff_float_to_fixed24_3dnow(int32_t *dst, const float *src, unsigned i
 extern void ff_float_to_fixed24_sse  (int32_t *dst, const float *src, unsigned int len);
 extern void ff_float_to_fixed24_sse2 (int32_t *dst, const float *src, unsigned int len);
 
+extern int ff_ac3_compute_mantissa_size_sse2(uint16_t mant_cnt[6][16]);
+
 av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c, int bit_exact)
 {
     int mm_flags = av_get_cpu_flags();
@@ -69,6 +71,7 @@ av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c, int bit_exact)
         c->ac3_exponent_min = ff_ac3_exponent_min_sse2;
         c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_sse2;
         c->float_to_fixed24 = ff_float_to_fixed24_sse2;
+        c->compute_mantissa_size = ff_ac3_compute_mantissa_size_sse2;
         if (!(mm_flags & AV_CPU_FLAG_SSE2SLOW)) {
             c->ac3_lshift_int16 = ff_ac3_lshift_int16_sse2;
             c->ac3_rshift_int32 = ff_ac3_rshift_int32_sse2;
diff --git a/libavdevice/libdc1394.c b/libavdevice/libdc1394.c
index bbb606b935..5a9043bc24 100644
--- a/libavdevice/libdc1394.c
+++ b/libavdevice/libdc1394.c
@@ -29,6 +29,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include "libavutil/parseutils.h"
+#include "libavutil/pixdesc.h"
 
 #include <dc1394/dc1394.h>
 
@@ -42,6 +43,7 @@ typedef struct dc1394_data {
     int current_frame;
     int fps;
     char *video_size;       /**< String describing video size, set by a private option. */
+    char *pixel_format;     /**< Set by a private option. */
 
     AVPacket packet;
 } dc1394_data;
@@ -82,6 +84,7 @@ struct dc1394_frame_rate {
 #define DEC AV_OPT_FLAG_DECODING_PARAM
 static const AVOption options[] = {
     { "video_size", "A string describing frame size, such as 640x480 or hd720.", OFFSET(video_size), FF_OPT_TYPE_STRING, {.str = "qvga"}, 0, 0, DEC },
+    { "pixel_format", "", OFFSET(pixel_format), FF_OPT_TYPE_STRING, {.str = "uyvy422"}, 0, 0, DEC },
     { NULL },
 };
 
@@ -134,6 +137,14 @@ static int dc1394_read_header(AVFormatContext *c, AVFormatParameters * ap)
         goto out_camera;
     }
 
+    if (dc1394->pixel_format) {
+        if ((ap->pix_fmt = av_get_pix_fmt(dc1394->pixel_format)) == PIX_FMT_NONE) {
+            av_log(c, AV_LOG_ERROR, "No such pixel format: %s.\n", dc1394->pixel_format);
+            ret = AVERROR(EINVAL);
+            goto out;
+        }
+    }
+
     if (dc1394->video_size) {
         if ((ret = av_parse_video_size(&ap->width, &ap->height, dc1394->video_size)) < 0) {
             av_log(c, AV_LOG_ERROR, "Couldn't parse video size.\n");
@@ -303,6 +314,8 @@ out_camera:
     dc1394_video_set_transmission(dc1394->camera, DC1394_OFF);
     dc1394_camera_free (dc1394->camera);
 out:
+    av_freep(&dc1394->video_size);
+    av_freep(&dc1394->pixel_format);
     dc1394_free(dc1394->d);
     return ret;
 }
diff --git a/libavdevice/v4l2.c b/libavdevice/v4l2.c
index 29ca01e7b5..9adf367fcb 100644
--- a/libavdevice/v4l2.c
+++ b/libavdevice/v4l2.c
@@ -50,6 +50,7 @@
 #include "libavutil/opt.h"
 #include "avdevice.h"
 #include "libavutil/parseutils.h"
+#include "libavutil/pixdesc.h"
 
 static const int desired_video_buffers = 256;
 
@@ -74,6 +75,7 @@ struct video_data {
     char *standard;
     int channel;
     char *video_size; /**< String describing video size, set by a private option. */
+    char *pixel_format; /**< Set by a private option. */
 };
 
 struct buff_data {
@@ -534,12 +536,12 @@ static int v4l2_set_parameters(AVFormatContext *s1, AVFormatParameters *ap)
 }
 
 static uint32_t device_try_init(AVFormatContext *s1,
-                                const AVFormatParameters *ap,
+                                enum PixelFormat pix_fmt,
                                 int *width,
                                 int *height,
                                 enum CodecID *codec_id)
 {
-    uint32_t desired_format = fmt_ff2v4l(ap->pix_fmt, s1->video_codec_id);
+    uint32_t desired_format = fmt_ff2v4l(pix_fmt, s1->video_codec_id);
 
     if (desired_format == 0 ||
         device_init(s1, width, height, desired_format) < 0) {
@@ -572,6 +574,7 @@ static int v4l2_read_header(AVFormatContext *s1, AVFormatParameters *ap)
     int res = 0;
     uint32_t desired_format, capabilities;
     enum CodecID codec_id;
+    enum PixelFormat pix_fmt = PIX_FMT_NONE;
 
     st = av_new_stream(s1, 0);
     if (!st) {
@@ -584,11 +587,18 @@ static int v4l2_read_header(AVFormatContext *s1, AVFormatParameters *ap)
         av_log(s1, AV_LOG_ERROR, "Couldn't parse video size.\n");
         goto out;
     }
+    if (s->pixel_format && (pix_fmt = av_get_pix_fmt(s->pixel_format)) == PIX_FMT_NONE) {
+        av_log(s1, AV_LOG_ERROR, "No such pixel format: %s.\n", s->pixel_format);
+        res = AVERROR(EINVAL);
+        goto out;
+    }
 #if FF_API_FORMAT_PARAMETERS
     if (ap->width > 0)
         s->width  = ap->width;
     if (ap->height > 0)
         s->height = ap->height;
+    if (ap->pix_fmt)
+        pix_fmt = ap->pix_fmt;
 #endif
 
     capabilities = 0;
@@ -614,7 +624,7 @@ static int v4l2_read_header(AVFormatContext *s1, AVFormatParameters *ap)
         av_log(s1, AV_LOG_VERBOSE, "Setting frame size to %dx%d\n", s->width, s->height);
     }
 
-    desired_format = device_try_init(s1, ap, &s->width, &s->height, &codec_id);
+    desired_format = device_try_init(s1, pix_fmt, &s->width, &s->height, &codec_id);
     if (desired_format == 0) {
         av_log(s1, AV_LOG_ERROR, "Cannot find a proper format for "
                "codec_id %d, pix_fmt %d.\n", s1->video_codec_id, ap->pix_fmt);
@@ -659,6 +669,7 @@ static int v4l2_read_header(AVFormatContext *s1, AVFormatParameters *ap)
 
 out:
     av_freep(&s->video_size);
+    av_freep(&s->pixel_format);
     return res;
 }
 
@@ -709,6 +720,7 @@ static const AVOption options[] = {
     { "standard", "", OFFSET(standard), FF_OPT_TYPE_STRING, {.str = NULL }, 0, 0, AV_OPT_FLAG_DECODING_PARAM },
     { "channel",  "", OFFSET(channel),  FF_OPT_TYPE_INT,    {.dbl = 0 }, 0, INT_MAX, AV_OPT_FLAG_DECODING_PARAM },
     { "video_size", "A string describing frame size, such as 640x480 or hd720.", OFFSET(video_size), FF_OPT_TYPE_STRING, {.str = NULL}, 0, 0, DEC },
+    { "pixel_format", "", OFFSET(pixel_format), FF_OPT_TYPE_STRING, {.str = NULL}, 0, 0, DEC },
     { NULL },
 };
 
diff --git a/libavformat/avformat.h b/libavformat/avformat.h
index 292479e3d6..41830adcac 100644
--- a/libavformat/avformat.h
+++ b/libavformat/avformat.h
@@ -233,9 +233,7 @@ typedef struct AVFormatParameters {
     attribute_deprecated int channels;
     attribute_deprecated int width;
     attribute_deprecated int height;
-#endif
-    enum PixelFormat pix_fmt;
-#if FF_API_FORMAT_PARAMETERS
+    attribute_deprecated enum PixelFormat pix_fmt;
     attribute_deprecated int channel; /**< Used to select DV channel. */
     attribute_deprecated const char *standard; /**< deprecated, use demuxer-specific options instead. */
     attribute_deprecated unsigned int mpeg2ts_raw:1;  /**< deprecated, use mpegtsraw demuxer */
diff --git a/libavformat/matroska.h b/libavformat/matroska.h
index 32b04c5a4b..ab7e3269fa 100644
--- a/libavformat/matroska.h
+++ b/libavformat/matroska.h
@@ -223,6 +223,24 @@ typedef enum {
   MATROSKA_TRACK_ENCODING_COMP_HEADERSTRIP = 3,
 } MatroskaTrackEncodingCompAlgo;
 
+typedef enum {
+  MATROSKA_VIDEO_STEREOMODE_TYPE_MONO               = 0,
+  MATROSKA_VIDEO_STEREOMODE_TYPE_LEFT_RIGHT         = 1,
+  MATROSKA_VIDEO_STEREOMODE_TYPE_BOTTOM_TOP         = 2,
+  MATROSKA_VIDEO_STEREOMODE_TYPE_TOP_BOTTOM         = 3,
+  MATROSKA_VIDEO_STEREOMODE_TYPE_CHECKERBOARD_RL    = 4,
+  MATROSKA_VIDEO_STEREOMODE_TYPE_CHECKERBOARD_LR    = 5,
+  MATROSKA_VIDEO_STEREOMODE_TYPE_ROW_INTERLEAVED_RL = 6,
+  MATROSKA_VIDEO_STEREOMODE_TYPE_ROW_INTERLEAVED_LR = 7,
+  MATROSKA_VIDEO_STEREOMODE_TYPE_COL_INTERLEAVED_RL = 8,
+  MATROSKA_VIDEO_STEREOMODE_TYPE_COL_INTERLEAVED_LR = 9,
+  MATROSKA_VIDEO_STEREOMODE_TYPE_ANAGLYPH_CYAN_RED  = 10,
+  MATROSKA_VIDEO_STEREOMODE_TYPE_RIGHT_LEFT         = 11,
+  MATROSKA_VIDEO_STEREOMODE_TYPE_ANAGLYPH_GREEN_MAG = 12,
+  MATROSKA_VIDEO_STEREOMODE_TYPE_BOTH_EYES_BLOCK_LR = 13,
+  MATROSKA_VIDEO_STEREOMODE_TYPE_BOTH_EYES_BLOCK_RL = 14,
+} MatroskaVideoStereoModeType;
+
 /*
  * Matroska Codec IDs, strings
  */
diff --git a/libavformat/rawdec.c b/libavformat/rawdec.c
index a1617f00c4..fa25c6ba0e 100644
--- a/libavformat/rawdec.c
+++ b/libavformat/rawdec.c
@@ -25,6 +25,7 @@
 #include "rawdec.h"
 #include "libavutil/opt.h"
 #include "libavutil/parseutils.h"
+#include "libavutil/pixdesc.h"
 
 /* raw input */
 int ff_raw_read_header(AVFormatContext *s, AVFormatParameters *ap)
@@ -70,30 +71,37 @@ int ff_raw_read_header(AVFormatContext *s, AVFormatParameters *ap)
         case AVMEDIA_TYPE_VIDEO: {
             FFRawVideoDemuxerContext *s1 = s->priv_data;
             int width = 0, height = 0, ret;
+            enum PixelFormat pix_fmt;
+
             if(ap->time_base.num)
                 av_set_pts_info(st, 64, ap->time_base.num, ap->time_base.den);
             else
                 av_set_pts_info(st, 64, 1, 25);
-            if (s1->video_size) {
-                ret = av_parse_video_size(&width, &height, s1->video_size);
-                av_freep(&s1->video_size);
-                if (ret < 0) {
-                    av_log(s, AV_LOG_ERROR, "Couldn't parse video size.\n");
-                    return ret;
-                }
+            if (s1->video_size && (ret = av_parse_video_size(&width, &height, s1->video_size)) < 0) {
+                av_log(s, AV_LOG_ERROR, "Couldn't parse video size.\n");
+                goto fail;
+            }
+            if ((pix_fmt = av_get_pix_fmt(s1->pixel_format)) == PIX_FMT_NONE) {
+                av_log(s, AV_LOG_ERROR, "No such pixel format: %s.\n", s1->pixel_format);
+                ret = AVERROR(EINVAL);
+                goto fail;
             }
 #if FF_API_FORMAT_PARAMETERS
             if (ap->width > 0)
                 width = ap->width;
             if (ap->height > 0)
                 height = ap->height;
+            if (ap->pix_fmt)
+                pix_fmt = ap->pix_fmt;
 #endif
             st->codec->width  = width;
             st->codec->height = height;
-            st->codec->pix_fmt = ap->pix_fmt;
-            if(st->codec->pix_fmt == PIX_FMT_NONE)
-                st->codec->pix_fmt= PIX_FMT_YUV420P;
+            st->codec->pix_fmt = pix_fmt;
             break;
+fail:
+            av_freep(&s1->video_size);
+            av_freep(&s1->pixel_format);
+            return ret;
             }
         default:
             return -1;
@@ -188,6 +196,7 @@ const AVClass ff_rawaudio_demuxer_class = {
 #define DEC AV_OPT_FLAG_DECODING_PARAM
 static const AVOption video_options[] = {
     { "video_size", "A string describing frame size, such as 640x480 or hd720.", OFFSET(video_size), FF_OPT_TYPE_STRING, {.str = NULL}, 0, 0, DEC },
+    { "pixel_format", "", OFFSET(pixel_format), FF_OPT_TYPE_STRING, {.str = "yuv420p"}, 0, 0, DEC },
     { NULL },
 };
 #undef OFFSET
diff --git a/libavformat/rawdec.h b/libavformat/rawdec.h
index 3eed6e711d..f5d40c063f 100644
--- a/libavformat/rawdec.h
+++ b/libavformat/rawdec.h
@@ -34,6 +34,7 @@ typedef struct RawAudioDemuxerContext {
 typedef struct FFRawVideoDemuxerContext {
     const AVClass *class;     /**< Class for private options. */
     char *video_size;         /**< String describing video size, set by a private option. */
+    char *pixel_format;       /**< Set by a private option. */
 } FFRawVideoDemuxerContext;
 
 extern const AVClass ff_rawaudio_demuxer_class;
diff --git a/libavutil/arm/intmath.h b/libavutil/arm/intmath.h
index 8eb346cd64..4130177549 100644
--- a/libavutil/arm/intmath.h
+++ b/libavutil/arm/intmath.h
@@ -33,12 +33,12 @@
 #define FASTDIV FASTDIV
 static av_always_inline av_const int FASTDIV(int a, int b)
 {
-    int r, t;
-    __asm__ ("cmp     %3, #2               \n\t"
-             "ldr     %1, [%4, %3, lsl #2] \n\t"
-             "lsrle   %0, %2, #1           \n\t"
-             "smmulgt %0, %1, %2           \n\t"
-             : "=&r"(r), "=&r"(t) : "r"(a), "r"(b), "r"(ff_inverse));
+    int r;
+    __asm__ ("cmp     %2, #2               \n\t"
+             "ldr     %0, [%3, %2, lsl #2] \n\t"
+             "lsrle   %0, %1, #1           \n\t"
+             "smmulgt %0, %0, %1           \n\t"
+             : "=&r"(r) : "r"(a), "r"(b), "r"(ff_inverse) : "cc");
     return r;
 }
 
diff --git a/libavutil/pixdesc.h b/libavutil/pixdesc.h
index fad6d59d87..0d19212afa 100644
--- a/libavutil/pixdesc.h
+++ b/libavutil/pixdesc.h
@@ -24,6 +24,7 @@
 
 #include "pixfmt.h"
 #include <inttypes.h>
+#include "pixfmt.h"
 
 typedef struct AVComponentDescriptor{
     uint16_t plane        :2;            ///< which of the 4 planes contains the component
diff --git a/libswscale/bfin/internal_bfin.S b/libswscale/bfin/internal_bfin.S
index 5af46540a8..cb8d71253c 100644
--- a/libswscale/bfin/internal_bfin.S
+++ b/libswscale/bfin/internal_bfin.S
@@ -466,8 +466,8 @@ DEFUN_END(yuv2rgb24_line)
 #define ARG_srcStride   40
 
 DEFUN(uyvytoyv12, mL3,  (const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
-                         long width, long height,
-                         long lumStride, long chromStride, long srcStride)):
+                         int width, int height,
+                         int lumStride, int chromStride, int srcStride)):
         link 0;
         [--sp] = (r7:4,p5:4);
 
@@ -539,8 +539,8 @@ DEFUN(uyvytoyv12, mL3,  (const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8
 DEFUN_END(uyvytoyv12)
 
 DEFUN(yuyvtoyv12, mL3,  (const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
-                         long width, long height,
-                         long lumStride, long chromStride, long srcStride)):
+                         int width, int height,
+                         int lumStride, int chromStride, int srcStride)):
         link 0;
         [--sp] = (r7:4,p5:4);
 
diff --git a/libswscale/bfin/swscale_bfin.c b/libswscale/bfin/swscale_bfin.c
index 01a772f4fc..4b26ba67c2 100644
--- a/libswscale/bfin/swscale_bfin.c
+++ b/libswscale/bfin/swscale_bfin.c
@@ -38,12 +38,12 @@
 #endif
 
 int ff_bfin_uyvytoyv12(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
-                       long width, long height,
-                       long lumStride, long chromStride, long srcStride) L1CODE;
+                       int width, int height,
+                       int lumStride, int chromStride, int srcStride) L1CODE;
 
 int ff_bfin_yuyvtoyv12(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
-                       long width, long height,
-                       long lumStride, long chromStride, long srcStride) L1CODE;
+                       int width, int height,
+                       int lumStride, int chromStride, int srcStride) L1CODE;
 
 static int uyvytoyv12_unscaled(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
                                int srcSliceH, uint8_t* dst[], int dstStride[])
diff --git a/libswscale/colorspace-test.c b/libswscale/colorspace-test.c
index 2c17f7dd0f..34095d8532 100644
--- a/libswscale/colorspace-test.c
+++ b/libswscale/colorspace-test.c
@@ -52,7 +52,7 @@ int main(int argc, char **argv)
             int src_bpp;
             int dst_bpp;
             const char *name;
-            void (*func)(const uint8_t *src, uint8_t *dst, long src_size);
+            void (*func)(const uint8_t *src, uint8_t *dst, int src_size);
         } func_info[] = {
             FUNC(2, 2, rgb15to16),
             FUNC(2, 3, rgb15to24),
diff --git a/libswscale/ppc/swscale_altivec_template.c b/libswscale/ppc/swscale_altivec_template.c
index d142c62e61..9a93252502 100644
--- a/libswscale/ppc/swscale_altivec_template.c
+++ b/libswscale/ppc/swscale_altivec_template.c
@@ -29,13 +29,13 @@ altivec_packIntArrayToCharArray(int *val, uint8_t* dest, int dstW)
     register int i;
     vector unsigned int altivec_vectorShiftInt19 =
         vec_add(vec_splat_u32(10), vec_splat_u32(9));
-    if ((unsigned long)dest % 16) {
+    if ((unsigned int)dest % 16) {
         /* badly aligned store, we force store alignment */
         /* and will handle load misalignment on val w/ vec_perm */
         vector unsigned char perm1;
         vector signed int v1;
         for (i = 0 ; (i < dstW) &&
-            (((unsigned long)dest + i) % 16) ; i++) {
+            (((unsigned int)dest + i) % 16) ; i++) {
                 int t = val[i] >> 19;
                 dest[i] = (t < 0) ? 0 : ((t > 255) ? 255 : t);
         }
diff --git a/libswscale/ppc/swscale_template.c b/libswscale/ppc/swscale_template.c
index 7eb2e3e28a..20a81e5662 100644
--- a/libswscale/ppc/swscale_template.c
+++ b/libswscale/ppc/swscale_template.c
@@ -30,7 +30,7 @@ static inline void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter,
                                     const int16_t **chrVSrc, int chrFilterSize,
                                     const int16_t **alpSrc,
                                     uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
-                                    uint8_t *aDest, long dstW, long chrDstW)
+                                    uint8_t *aDest, int dstW, int chrDstW)
 {
     yuv2yuvX_altivec_real(lumFilter, lumSrc, lumFilterSize,
                           chrFilter, chrUSrc, chrVSrc, chrFilterSize,
@@ -45,7 +45,7 @@ static inline void RENAME(yuv2packedX)(SwsContext *c, const int16_t *lumFilter,
                                        const int16_t *chrFilter, const int16_t **chrUSrc,
                                        const int16_t **chrVSrc, int chrFilterSize,
                                        const int16_t **alpSrc, uint8_t *dest,
-                                       long dstW, long dstY)
+                                       int dstW, int dstY)
 {
     /* The following list of supported dstFormat values should
        match what's found in the body of ff_yuv2packedX_altivec() */
diff --git a/libswscale/ppc/yuv2rgb_altivec.c b/libswscale/ppc/yuv2rgb_altivec.c
index 45d4ca7347..06f8df8239 100644
--- a/libswscale/ppc/yuv2rgb_altivec.c
+++ b/libswscale/ppc/yuv2rgb_altivec.c
@@ -793,7 +793,7 @@ ff_yuv2packedX_altivec(SwsContext *c, const int16_t *lumFilter,
 
     vector signed short   RND = vec_splat_s16(1<<3);
     vector unsigned short SCL = vec_splat_u16(4);
-    DECLARE_ALIGNED(16, unsigned long, scratch)[16];
+    DECLARE_ALIGNED(16, unsigned int, scratch)[16];
 
     vector signed short *YCoeffs, *CCoeffs;
 
diff --git a/libswscale/rgb2rgb.c b/libswscale/rgb2rgb.c
index e7f2990b00..84ef43b774 100644
--- a/libswscale/rgb2rgb.c
+++ b/libswscale/rgb2rgb.c
@@ -29,70 +29,70 @@
 #include "swscale.h"
 #include "swscale_internal.h"
 
-void (*rgb24tobgr32)(const uint8_t *src, uint8_t *dst, long src_size);
-void (*rgb24tobgr16)(const uint8_t *src, uint8_t *dst, long src_size);
-void (*rgb24tobgr15)(const uint8_t *src, uint8_t *dst, long src_size);
-void (*rgb32tobgr24)(const uint8_t *src, uint8_t *dst, long src_size);
-void (*rgb32to16)(const uint8_t *src, uint8_t *dst, long src_size);
-void (*rgb32to15)(const uint8_t *src, uint8_t *dst, long src_size);
-void (*rgb15to16)(const uint8_t *src, uint8_t *dst, long src_size);
-void (*rgb15tobgr24)(const uint8_t *src, uint8_t *dst, long src_size);
-void (*rgb15to32)(const uint8_t *src, uint8_t *dst, long src_size);
-void (*rgb16to15)(const uint8_t *src, uint8_t *dst, long src_size);
-void (*rgb16tobgr24)(const uint8_t *src, uint8_t *dst, long src_size);
-void (*rgb16to32)(const uint8_t *src, uint8_t *dst, long src_size);
-void (*rgb24tobgr24)(const uint8_t *src, uint8_t *dst, long src_size);
-void (*rgb24to16)(const uint8_t *src, uint8_t *dst, long src_size);
-void (*rgb24to15)(const uint8_t *src, uint8_t *dst, long src_size);
-void (*shuffle_bytes_2103)(const uint8_t *src, uint8_t *dst, long src_size);
-void (*rgb32tobgr16)(const uint8_t *src, uint8_t *dst, long src_size);
-void (*rgb32tobgr15)(const uint8_t *src, uint8_t *dst, long src_size);
+void (*rgb24tobgr32)(const uint8_t *src, uint8_t *dst, int src_size);
+void (*rgb24tobgr16)(const uint8_t *src, uint8_t *dst, int src_size);
+void (*rgb24tobgr15)(const uint8_t *src, uint8_t *dst, int src_size);
+void (*rgb32tobgr24)(const uint8_t *src, uint8_t *dst, int src_size);
+void (*rgb32to16)(const uint8_t *src, uint8_t *dst, int src_size);
+void (*rgb32to15)(const uint8_t *src, uint8_t *dst, int src_size);
+void (*rgb15to16)(const uint8_t *src, uint8_t *dst, int src_size);
+void (*rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int src_size);
+void (*rgb15to32)(const uint8_t *src, uint8_t *dst, int src_size);
+void (*rgb16to15)(const uint8_t *src, uint8_t *dst, int src_size);
+void (*rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int src_size);
+void (*rgb16to32)(const uint8_t *src, uint8_t *dst, int src_size);
+void (*rgb24tobgr24)(const uint8_t *src, uint8_t *dst, int src_size);
+void (*rgb24to16)(const uint8_t *src, uint8_t *dst, int src_size);
+void (*rgb24to15)(const uint8_t *src, uint8_t *dst, int src_size);
+void (*shuffle_bytes_2103)(const uint8_t *src, uint8_t *dst, int src_size);
+void (*rgb32tobgr16)(const uint8_t *src, uint8_t *dst, int src_size);
+void (*rgb32tobgr15)(const uint8_t *src, uint8_t *dst, int src_size);
 
 void (*yv12toyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
-                   long width, long height,
-                   long lumStride, long chromStride, long dstStride);
+                   int width, int height,
+                   int lumStride, int chromStride, int dstStride);
 void (*yv12touyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
-                   long width, long height,
-                   long lumStride, long chromStride, long dstStride);
+                   int width, int height,
+                   int lumStride, int chromStride, int dstStride);
 void (*yuv422ptoyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
-                      long width, long height,
-                      long lumStride, long chromStride, long dstStride);
+                      int width, int height,
+                      int lumStride, int chromStride, int dstStride);
 void (*yuv422ptouyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
-                      long width, long height,
-                      long lumStride, long chromStride, long dstStride);
+                      int width, int height,
+                      int lumStride, int chromStride, int dstStride);
 void (*yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
-                   long width, long height,
-                   long lumStride, long chromStride, long srcStride);
+                   int width, int height,
+                   int lumStride, int chromStride, int srcStride);
 void (*rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
-                    long width, long height,
-                    long lumStride, long chromStride, long srcStride);
-void (*planar2x)(const uint8_t *src, uint8_t *dst, long width, long height,
-                 long srcStride, long dstStride);
+                    int width, int height,
+                    int lumStride, int chromStride, int srcStride);
+void (*planar2x)(const uint8_t *src, uint8_t *dst, int width, int height,
+                 int srcStride, int dstStride);
 void (*interleaveBytes)(const uint8_t *src1, const uint8_t *src2, uint8_t *dst,
-                        long width, long height, long src1Stride,
-                        long src2Stride, long dstStride);
+                        int width, int height, int src1Stride,
+                        int src2Stride, int dstStride);
 void (*vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2,
                     uint8_t *dst1, uint8_t *dst2,
-                    long width, long height,
-                    long srcStride1, long srcStride2,
-                    long dstStride1, long dstStride2);
+                    int width, int height,
+                    int srcStride1, int srcStride2,
+                    int dstStride1, int dstStride2);
 void (*yvu9_to_yuy2)(const uint8_t *src1, const uint8_t *src2, const uint8_t *src3,
                      uint8_t *dst,
-                     long width, long height,
-                     long srcStride1, long srcStride2,
-                     long srcStride3, long dstStride);
+                     int width, int height,
+                     int srcStride1, int srcStride2,
+                     int srcStride3, int dstStride);
 void (*uyvytoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src,
-                     long width, long height,
-                     long lumStride, long chromStride, long srcStride);
+                     int width, int height,
+                     int lumStride, int chromStride, int srcStride);
 void (*uyvytoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src,
-                     long width, long height,
-                     long lumStride, long chromStride, long srcStride);
+                     int width, int height,
+                     int lumStride, int chromStride, int srcStride);
 void (*yuyvtoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src,
-                     long width, long height,
-                     long lumStride, long chromStride, long srcStride);
+                     int width, int height,
+                     int lumStride, int chromStride, int srcStride);
 void (*yuyvtoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src,
-                     long width, long height,
-                     long lumStride, long chromStride, long srcStride);
+                     int width, int height,
+                     int lumStride, int chromStride, int srcStride);
 
 #define RGB2YUV_SHIFT 8
 #define BY ((int)( 0.098*(1<<RGB2YUV_SHIFT)+0.5))
@@ -151,10 +151,10 @@ void palette8tobgr16(const uint8_t *src, uint8_t *dst, long num_pixels, const ui
 }
 #endif
 
-void rgb32to24(const uint8_t *src, uint8_t *dst, long src_size)
+void rgb32to24(const uint8_t *src, uint8_t *dst, int src_size)
 {
-    long i;
-    long num_pixels = src_size >> 2;
+    int i;
+    int num_pixels = src_size >> 2;
     for (i=0; i<num_pixels; i++) {
 #if HAVE_BIGENDIAN
         /* RGB32 (= A,B,G,R) -> BGR24 (= B,G,R) */
@@ -169,9 +169,9 @@ void rgb32to24(const uint8_t *src, uint8_t *dst, long src_size)
     }
 }
 
-void rgb24to32(const uint8_t *src, uint8_t *dst, long src_size)
+void rgb24to32(const uint8_t *src, uint8_t *dst, int src_size)
 {
-    long i;
+    int i;
     for (i=0; 3*i<src_size; i++) {
 #if HAVE_BIGENDIAN
         /* RGB24 (= R,G,B) -> BGR32 (= A,R,G,B) */
@@ -188,7 +188,7 @@ void rgb24to32(const uint8_t *src, uint8_t *dst, long src_size)
     }
 }
 
-void rgb16tobgr32(const uint8_t *src, uint8_t *dst, long src_size)
+void rgb16tobgr32(const uint8_t *src, uint8_t *dst, int src_size)
 {
     const uint16_t *end;
     uint8_t *d = dst;
@@ -211,7 +211,7 @@ void rgb16tobgr32(const uint8_t *src, uint8_t *dst, long src_size)
     }
 }
 
-void rgb16to24(const uint8_t *src, uint8_t *dst, long src_size)
+void rgb16to24(const uint8_t *src, uint8_t *dst, int src_size)
 {
     const uint16_t *end;
     uint8_t *d = dst;
@@ -226,10 +226,10 @@ void rgb16to24(const uint8_t *src, uint8_t *dst, long src_size)
     }
 }
 
-void rgb16tobgr16(const uint8_t *src, uint8_t *dst, long src_size)
+void rgb16tobgr16(const uint8_t *src, uint8_t *dst, int src_size)
 {
-    long i;
-    long num_pixels = src_size >> 1;
+    int i;
+    int num_pixels = src_size >> 1;
 
     for (i=0; i<num_pixels; i++) {
         unsigned rgb = ((const uint16_t*)src)[i];
@@ -237,10 +237,10 @@ void rgb16tobgr16(const uint8_t *src, uint8_t *dst, long src_size)
     }
 }
 
-void rgb16tobgr15(const uint8_t *src, uint8_t *dst, long src_size)
+void rgb16tobgr15(const uint8_t *src, uint8_t *dst, int src_size)
 {
-    long i;
-    long num_pixels = src_size >> 1;
+    int i;
+    int num_pixels = src_size >> 1;
 
     for (i=0; i<num_pixels; i++) {
         unsigned rgb = ((const uint16_t*)src)[i];
@@ -248,7 +248,7 @@ void rgb16tobgr15(const uint8_t *src, uint8_t *dst, long src_size)
     }
 }
 
-void rgb15tobgr32(const uint8_t *src, uint8_t *dst, long src_size)
+void rgb15tobgr32(const uint8_t *src, uint8_t *dst, int src_size)
 {
     const uint16_t *end;
     uint8_t *d = dst;
@@ -271,7 +271,7 @@ void rgb15tobgr32(const uint8_t *src, uint8_t *dst, long src_size)
     }
 }
 
-void rgb15to24(const uint8_t *src, uint8_t *dst, long src_size)
+void rgb15to24(const uint8_t *src, uint8_t *dst, int src_size)
 {
     const uint16_t *end;
     uint8_t *d = dst;
@@ -286,10 +286,10 @@ void rgb15to24(const uint8_t *src, uint8_t *dst, long src_size)
     }
 }
 
-void rgb15tobgr16(const uint8_t *src, uint8_t *dst, long src_size)
+void rgb15tobgr16(const uint8_t *src, uint8_t *dst, int src_size)
 {
-    long i;
-    long num_pixels = src_size >> 1;
+    int i;
+    int num_pixels = src_size >> 1;
 
     for (i=0; i<num_pixels; i++) {
         unsigned rgb = ((const uint16_t*)src)[i];
@@ -297,10 +297,10 @@ void rgb15tobgr16(const uint8_t *src, uint8_t *dst, long src_size)
     }
 }
 
-void rgb15tobgr15(const uint8_t *src, uint8_t *dst, long src_size)
+void rgb15tobgr15(const uint8_t *src, uint8_t *dst, int src_size)
 {
-    long i;
-    long num_pixels = src_size >> 1;
+    int i;
+    int num_pixels = src_size >> 1;
 
     for (i=0; i<num_pixels; i++) {
         unsigned br;
@@ -310,10 +310,10 @@ void rgb15tobgr15(const uint8_t *src, uint8_t *dst, long src_size)
     }
 }
 
-void bgr8torgb8(const uint8_t *src, uint8_t *dst, long src_size)
+void bgr8torgb8(const uint8_t *src, uint8_t *dst, int src_size)
 {
-    long i;
-    long num_pixels = src_size;
+    int i;
+    int num_pixels = src_size;
     for (i=0; i<num_pixels; i++) {
         unsigned b,g,r;
         register uint8_t rgb;
@@ -326,9 +326,9 @@ void bgr8torgb8(const uint8_t *src, uint8_t *dst, long src_size)
 }
 
 #define DEFINE_SHUFFLE_BYTES(a, b, c, d)                                \
-void shuffle_bytes_##a##b##c##d(const uint8_t *src, uint8_t *dst, long src_size) \
+void shuffle_bytes_##a##b##c##d(const uint8_t *src, uint8_t *dst, int src_size) \
 {                                                                       \
-    long i;                                                             \
+    int i;                                                             \
                                                                         \
     for (i = 0; i < src_size; i+=4) {                                   \
         dst[i + 0] = src[i + a];                                        \
diff --git a/libswscale/rgb2rgb.h b/libswscale/rgb2rgb.h
index f1c0e00533..6923dd9608 100644
--- a/libswscale/rgb2rgb.h
+++ b/libswscale/rgb2rgb.h
@@ -32,41 +32,41 @@
 #include "libavutil/avutil.h"
 
 /* A full collection of RGB to RGB(BGR) converters */
-extern void (*rgb24tobgr32)(const uint8_t *src, uint8_t *dst, long src_size);
-extern void (*rgb24tobgr16)(const uint8_t *src, uint8_t *dst, long src_size);
-extern void (*rgb24tobgr15)(const uint8_t *src, uint8_t *dst, long src_size);
-extern void (*rgb32tobgr24)(const uint8_t *src, uint8_t *dst, long src_size);
-extern void (*rgb32to16)   (const uint8_t *src, uint8_t *dst, long src_size);
-extern void (*rgb32to15)   (const uint8_t *src, uint8_t *dst, long src_size);
-extern void (*rgb15to16)   (const uint8_t *src, uint8_t *dst, long src_size);
-extern void (*rgb15tobgr24)(const uint8_t *src, uint8_t *dst, long src_size);
-extern void (*rgb15to32)   (const uint8_t *src, uint8_t *dst, long src_size);
-extern void (*rgb16to15)   (const uint8_t *src, uint8_t *dst, long src_size);
-extern void (*rgb16tobgr24)(const uint8_t *src, uint8_t *dst, long src_size);
-extern void (*rgb16to32)   (const uint8_t *src, uint8_t *dst, long src_size);
-extern void (*rgb24tobgr24)(const uint8_t *src, uint8_t *dst, long src_size);
-extern void (*rgb24to16)   (const uint8_t *src, uint8_t *dst, long src_size);
-extern void (*rgb24to15)   (const uint8_t *src, uint8_t *dst, long src_size);
-extern void (*shuffle_bytes_2103)(const uint8_t *src, uint8_t *dst, long src_size);
-extern void (*rgb32tobgr16)(const uint8_t *src, uint8_t *dst, long src_size);
-extern void (*rgb32tobgr15)(const uint8_t *src, uint8_t *dst, long src_size);
-
-void rgb24to32   (const uint8_t *src, uint8_t *dst, long src_size);
-void rgb32to24   (const uint8_t *src, uint8_t *dst, long src_size);
-void rgb16tobgr32(const uint8_t *src, uint8_t *dst, long src_size);
-void rgb16to24   (const uint8_t *src, uint8_t *dst, long src_size);
-void rgb16tobgr16(const uint8_t *src, uint8_t *dst, long src_size);
-void rgb16tobgr15(const uint8_t *src, uint8_t *dst, long src_size);
-void rgb15tobgr32(const uint8_t *src, uint8_t *dst, long src_size);
-void rgb15to24   (const uint8_t *src, uint8_t *dst, long src_size);
-void rgb15tobgr16(const uint8_t *src, uint8_t *dst, long src_size);
-void rgb15tobgr15(const uint8_t *src, uint8_t *dst, long src_size);
-void bgr8torgb8  (const uint8_t *src, uint8_t *dst, long src_size);
-
-void shuffle_bytes_0321(const uint8_t *src, uint8_t *dst, long src_size);
-void shuffle_bytes_1230(const uint8_t *src, uint8_t *dst, long src_size);
-void shuffle_bytes_3012(const uint8_t *src, uint8_t *dst, long src_size);
-void shuffle_bytes_3210(const uint8_t *src, uint8_t *dst, long src_size);
+extern void (*rgb24tobgr32)(const uint8_t *src, uint8_t *dst, int src_size);
+extern void (*rgb24tobgr16)(const uint8_t *src, uint8_t *dst, int src_size);
+extern void (*rgb24tobgr15)(const uint8_t *src, uint8_t *dst, int src_size);
+extern void (*rgb32tobgr24)(const uint8_t *src, uint8_t *dst, int src_size);
+extern void (*rgb32to16)   (const uint8_t *src, uint8_t *dst, int src_size);
+extern void (*rgb32to15)   (const uint8_t *src, uint8_t *dst, int src_size);
+extern void (*rgb15to16)   (const uint8_t *src, uint8_t *dst, int src_size);
+extern void (*rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int src_size);
+extern void (*rgb15to32)   (const uint8_t *src, uint8_t *dst, int src_size);
+extern void (*rgb16to15)   (const uint8_t *src, uint8_t *dst, int src_size);
+extern void (*rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int src_size);
+extern void (*rgb16to32)   (const uint8_t *src, uint8_t *dst, int src_size);
+extern void (*rgb24tobgr24)(const uint8_t *src, uint8_t *dst, int src_size);
+extern void (*rgb24to16)   (const uint8_t *src, uint8_t *dst, int src_size);
+extern void (*rgb24to15)   (const uint8_t *src, uint8_t *dst, int src_size);
+extern void (*shuffle_bytes_2103)(const uint8_t *src, uint8_t *dst, int src_size);
+extern void (*rgb32tobgr16)(const uint8_t *src, uint8_t *dst, int src_size);
+extern void (*rgb32tobgr15)(const uint8_t *src, uint8_t *dst, int src_size);
+
+void rgb24to32   (const uint8_t *src, uint8_t *dst, int src_size);
+void rgb32to24   (const uint8_t *src, uint8_t *dst, int src_size);
+void rgb16tobgr32(const uint8_t *src, uint8_t *dst, int src_size);
+void rgb16to24   (const uint8_t *src, uint8_t *dst, int src_size);
+void rgb16tobgr16(const uint8_t *src, uint8_t *dst, int src_size);
+void rgb16tobgr15(const uint8_t *src, uint8_t *dst, int src_size);
+void rgb15tobgr32(const uint8_t *src, uint8_t *dst, int src_size);
+void rgb15to24   (const uint8_t *src, uint8_t *dst, int src_size);
+void rgb15tobgr16(const uint8_t *src, uint8_t *dst, int src_size);
+void rgb15tobgr15(const uint8_t *src, uint8_t *dst, int src_size);
+void bgr8torgb8  (const uint8_t *src, uint8_t *dst, int src_size);
+
+void shuffle_bytes_0321(const uint8_t *src, uint8_t *dst, int src_size);
+void shuffle_bytes_1230(const uint8_t *src, uint8_t *dst, int src_size);
+void shuffle_bytes_3012(const uint8_t *src, uint8_t *dst, int src_size);
+void shuffle_bytes_3210(const uint8_t *src, uint8_t *dst, int src_size);
 
 #if LIBSWSCALE_VERSION_MAJOR < 1
 /* deprecated, use the public versions in swscale.h */
@@ -80,46 +80,46 @@ attribute_deprecated void palette8tobgr16(const uint8_t *src, uint8_t *dst, long
 
 
 void rgb24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
-                   uint8_t *vdst, long width, long height, long lumStride,
-                   long chromStride, long srcStride);
+                   uint8_t *vdst, int width, int height, int lumStride,
+                   int chromStride, int srcStride);
 
 /**
  * Height should be a multiple of 2 and width should be a multiple of 16.
  * (If this is a problem for anyone then tell me, and I will fix it.)
  */
 extern void (*yv12toyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
-                          long width, long height,
-                          long lumStride, long chromStride, long dstStride);
+                          int width, int height,
+                          int lumStride, int chromStride, int dstStride);
 
 /**
  * Width should be a multiple of 16.
  */
 extern void (*yuv422ptoyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
-                             long width, long height,
-                             long lumStride, long chromStride, long dstStride);
+                             int width, int height,
+                             int lumStride, int chromStride, int dstStride);
 
 /**
  * Height should be a multiple of 2 and width should be a multiple of 16.
  * (If this is a problem for anyone then tell me, and I will fix it.)
  */
 extern void (*yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
-                          long width, long height,
-                          long lumStride, long chromStride, long srcStride);
+                          int width, int height,
+                          int lumStride, int chromStride, int srcStride);
 
 /**
  * Height should be a multiple of 2 and width should be a multiple of 16.
  * (If this is a problem for anyone then tell me, and I will fix it.)
  */
 extern void (*yv12touyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
-                          long width, long height,
-                          long lumStride, long chromStride, long dstStride);
+                          int width, int height,
+                          int lumStride, int chromStride, int dstStride);
 
 /**
  * Width should be a multiple of 16.
  */
 extern void (*yuv422ptouyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
-                             long width, long height,
-                             long lumStride, long chromStride, long dstStride);
+                             int width, int height,
+                             int lumStride, int chromStride, int dstStride);
 
 /**
  * Height should be a multiple of 2 and width should be a multiple of 2.
@@ -128,40 +128,40 @@ extern void (*yuv422ptouyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uin
  * FIXME: Write high quality version.
  */
 extern void (*rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
-                           long width, long height,
-                           long lumStride, long chromStride, long srcStride);
-extern void (*planar2x)(const uint8_t *src, uint8_t *dst, long width, long height,
-                        long srcStride, long dstStride);
+                           int width, int height,
+                           int lumStride, int chromStride, int srcStride);
+extern void (*planar2x)(const uint8_t *src, uint8_t *dst, int width, int height,
+                        int srcStride, int dstStride);
 
 extern void (*interleaveBytes)(const uint8_t *src1, const uint8_t *src2, uint8_t *dst,
-                               long width, long height, long src1Stride,
-                               long src2Stride, long dstStride);
+                               int width, int height, int src1Stride,
+                               int src2Stride, int dstStride);
 
 extern void (*vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2,
                            uint8_t *dst1, uint8_t *dst2,
-                           long width, long height,
-                           long srcStride1, long srcStride2,
-                           long dstStride1, long dstStride2);
+                           int width, int height,
+                           int srcStride1, int srcStride2,
+                           int dstStride1, int dstStride2);
 
 extern void (*yvu9_to_yuy2)(const uint8_t *src1, const uint8_t *src2, const uint8_t *src3,
                             uint8_t *dst,
-                            long width, long height,
-                            long srcStride1, long srcStride2,
-                            long srcStride3, long dstStride);
+                            int width, int height,
+                            int srcStride1, int srcStride2,
+                            int srcStride3, int dstStride);
 
 
 extern void (*uyvytoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src,
-                            long width, long height,
-                            long lumStride, long chromStride, long srcStride);
+                            int width, int height,
+                            int lumStride, int chromStride, int srcStride);
 extern void (*uyvytoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src,
-                            long width, long height,
-                            long lumStride, long chromStride, long srcStride);
+                            int width, int height,
+                            int lumStride, int chromStride, int srcStride);
 extern void (*yuyvtoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src,
-                            long width, long height,
-                            long lumStride, long chromStride, long srcStride);
+                            int width, int height,
+                            int lumStride, int chromStride, int srcStride);
 extern void (*yuyvtoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src,
-                            long width, long height,
-                            long lumStride, long chromStride, long srcStride);
+                            int width, int height,
+                            int lumStride, int chromStride, int srcStride);
 
 void sws_rgb2rgb_init(void);
 
diff --git a/libswscale/rgb2rgb_template.c b/libswscale/rgb2rgb_template.c
index a86f21ed29..beb7c34e0c 100644
--- a/libswscale/rgb2rgb_template.c
+++ b/libswscale/rgb2rgb_template.c
@@ -26,7 +26,7 @@
 
 #include <stddef.h>
 
-static inline void rgb24tobgr32_c(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void rgb24tobgr32_c(const uint8_t *src, uint8_t *dst, int src_size)
 {
     uint8_t *dest = dst;
     const uint8_t *s = src;
@@ -50,7 +50,7 @@ static inline void rgb24tobgr32_c(const uint8_t *src, uint8_t *dst, long src_siz
     }
 }
 
-static inline void rgb32tobgr24_c(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void rgb32tobgr24_c(const uint8_t *src, uint8_t *dst, int src_size)
 {
     uint8_t *dest = dst;
     const uint8_t *s = src;
@@ -81,7 +81,7 @@ static inline void rgb32tobgr24_c(const uint8_t *src, uint8_t *dst, long src_siz
  MMX2, 3DNOW optimization by Nick Kurshev
  32-bit C version, and and&add trick by Michael Niedermayer
 */
-static inline void rgb15to16_c(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void rgb15to16_c(const uint8_t *src, uint8_t *dst, int src_size)
 {
     register const uint8_t* s=src;
     register uint8_t* d=dst;
@@ -101,7 +101,7 @@ static inline void rgb15to16_c(const uint8_t *src, uint8_t *dst, long src_size)
     }
 }
 
-static inline void rgb16to15_c(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void rgb16to15_c(const uint8_t *src, uint8_t *dst, int src_size)
 {
     register const uint8_t* s=src;
     register uint8_t* d=dst;
@@ -122,7 +122,7 @@ static inline void rgb16to15_c(const uint8_t *src, uint8_t *dst, long src_size)
     }
 }
 
-static inline void rgb32to16_c(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void rgb32to16_c(const uint8_t *src, uint8_t *dst, int src_size)
 {
     const uint8_t *s = src;
     const uint8_t *end;
@@ -135,7 +135,7 @@ static inline void rgb32to16_c(const uint8_t *src, uint8_t *dst, long src_size)
     }
 }
 
-static inline void rgb32tobgr16_c(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void rgb32tobgr16_c(const uint8_t *src, uint8_t *dst, int src_size)
 {
     const uint8_t *s = src;
     const uint8_t *end;
@@ -147,7 +147,7 @@ static inline void rgb32tobgr16_c(const uint8_t *src, uint8_t *dst, long src_siz
     }
 }
 
-static inline void rgb32to15_c(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void rgb32to15_c(const uint8_t *src, uint8_t *dst, int src_size)
 {
     const uint8_t *s = src;
     const uint8_t *end;
@@ -159,7 +159,7 @@ static inline void rgb32to15_c(const uint8_t *src, uint8_t *dst, long src_size)
     }
 }
 
-static inline void rgb32tobgr15_c(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void rgb32tobgr15_c(const uint8_t *src, uint8_t *dst, int src_size)
 {
     const uint8_t *s = src;
     const uint8_t *end;
@@ -171,7 +171,7 @@ static inline void rgb32tobgr15_c(const uint8_t *src, uint8_t *dst, long src_siz
     }
 }
 
-static inline void rgb24tobgr16_c(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void rgb24tobgr16_c(const uint8_t *src, uint8_t *dst, int src_size)
 {
     const uint8_t *s = src;
     const uint8_t *end;
@@ -185,7 +185,7 @@ static inline void rgb24tobgr16_c(const uint8_t *src, uint8_t *dst, long src_siz
     }
 }
 
-static inline void rgb24to16_c(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void rgb24to16_c(const uint8_t *src, uint8_t *dst, int src_size)
 {
     const uint8_t *s = src;
     const uint8_t *end;
@@ -199,7 +199,7 @@ static inline void rgb24to16_c(const uint8_t *src, uint8_t *dst, long src_size)
     }
 }
 
-static inline void rgb24tobgr15_c(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void rgb24tobgr15_c(const uint8_t *src, uint8_t *dst, int src_size)
 {
     const uint8_t *s = src;
     const uint8_t *end;
@@ -213,7 +213,7 @@ static inline void rgb24tobgr15_c(const uint8_t *src, uint8_t *dst, long src_siz
     }
 }
 
-static inline void rgb24to15_c(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void rgb24to15_c(const uint8_t *src, uint8_t *dst, int src_size)
 {
     const uint8_t *s = src;
     const uint8_t *end;
@@ -248,7 +248,7 @@ static inline void rgb24to15_c(const uint8_t *src, uint8_t *dst, long src_size)
        |
    original bits
 */
-static inline void rgb15tobgr24_c(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void rgb15tobgr24_c(const uint8_t *src, uint8_t *dst, int src_size)
 {
     const uint16_t *end;
     uint8_t *d = dst;
@@ -263,7 +263,7 @@ static inline void rgb15tobgr24_c(const uint8_t *src, uint8_t *dst, long src_siz
     }
 }
 
-static inline void rgb16tobgr24_c(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void rgb16tobgr24_c(const uint8_t *src, uint8_t *dst, int src_size)
 {
     const uint16_t *end;
     uint8_t *d = (uint8_t *)dst;
@@ -278,7 +278,7 @@ static inline void rgb16tobgr24_c(const uint8_t *src, uint8_t *dst, long src_siz
     }
 }
 
-static inline void rgb15to32_c(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void rgb15to32_c(const uint8_t *src, uint8_t *dst, int src_size)
 {
     const uint16_t *end;
     uint8_t *d = dst;
@@ -301,7 +301,7 @@ static inline void rgb15to32_c(const uint8_t *src, uint8_t *dst, long src_size)
     }
 }
 
-static inline void rgb16to32_c(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void rgb16to32_c(const uint8_t *src, uint8_t *dst, int src_size)
 {
     const uint16_t *end;
     uint8_t *d = dst;
@@ -324,7 +324,7 @@ static inline void rgb16to32_c(const uint8_t *src, uint8_t *dst, long src_size)
     }
 }
 
-static inline void shuffle_bytes_2103_c(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void shuffle_bytes_2103_c(const uint8_t *src, uint8_t *dst, int src_size)
 {
     int idx = 15 - src_size;
     const uint8_t *s = src-idx;
@@ -336,7 +336,7 @@ static inline void shuffle_bytes_2103_c(const uint8_t *src, uint8_t *dst, long s
     }
 }
 
-static inline void rgb24tobgr24_c(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void rgb24tobgr24_c(const uint8_t *src, uint8_t *dst, int src_size)
 {
     unsigned i;
     for (i=0; i<src_size; i+=3) {
@@ -350,11 +350,11 @@ static inline void rgb24tobgr24_c(const uint8_t *src, uint8_t *dst, long src_siz
 
 static inline void yuvPlanartoyuy2_c(const uint8_t *ysrc, const uint8_t *usrc,
                                      const uint8_t *vsrc, uint8_t *dst,
-                                     long width, long height,
-                                     long lumStride, long chromStride,
-                                     long dstStride, long vertLumPerChroma)
+                                     int width, int height,
+                                     int lumStride, int chromStride,
+                                     int dstStride, int vertLumPerChroma)
 {
-    long y;
+    int y;
     const int chromWidth = width >> 1;
     for (y=0; y<height; y++) {
 #if HAVE_FAST_64BIT
@@ -404,9 +404,9 @@ static inline void yuvPlanartoyuy2_c(const uint8_t *ysrc, const uint8_t *usrc,
  */
 static inline void yv12toyuy2_c(const uint8_t *ysrc, const uint8_t *usrc,
                                 const uint8_t *vsrc, uint8_t *dst,
-                                long width, long height,
-                                long lumStride, long chromStride,
-                                long dstStride)
+                                int width, int height,
+                                int lumStride, int chromStride,
+                                int dstStride)
 {
     //FIXME interpolate chroma
     yuvPlanartoyuy2_c(ysrc, usrc, vsrc, dst, width, height, lumStride,
@@ -415,11 +415,11 @@ static inline void yv12toyuy2_c(const uint8_t *ysrc, const uint8_t *usrc,
 
 static inline void yuvPlanartouyvy_c(const uint8_t *ysrc, const uint8_t *usrc,
                                      const uint8_t *vsrc, uint8_t *dst,
-                                     long width, long height,
-                                     long lumStride, long chromStride,
-                                     long dstStride, long vertLumPerChroma)
+                                     int width, int height,
+                                     int lumStride, int chromStride,
+                                     int dstStride, int vertLumPerChroma)
 {
-    long y;
+    int y;
     const int chromWidth = width >> 1;
     for (y=0; y<height; y++) {
 #if HAVE_FAST_64BIT
@@ -469,9 +469,9 @@ static inline void yuvPlanartouyvy_c(const uint8_t *ysrc, const uint8_t *usrc,
  */
 static inline void yv12touyvy_c(const uint8_t *ysrc, const uint8_t *usrc,
                                 const uint8_t *vsrc, uint8_t *dst,
-                                long width, long height,
-                                long lumStride, long chromStride,
-                                long dstStride)
+                                int width, int height,
+                                int lumStride, int chromStride,
+                                int dstStride)
 {
     //FIXME interpolate chroma
     yuvPlanartouyvy_c(ysrc, usrc, vsrc, dst, width, height, lumStride,
@@ -483,9 +483,9 @@ static inline void yv12touyvy_c(const uint8_t *ysrc, const uint8_t *usrc,
  */
 static inline void yuv422ptouyvy_c(const uint8_t *ysrc, const uint8_t *usrc,
                                    const uint8_t *vsrc, uint8_t *dst,
-                                   long width, long height,
-                                   long lumStride, long chromStride,
-                                   long dstStride)
+                                   int width, int height,
+                                   int lumStride, int chromStride,
+                                   int dstStride)
 {
     yuvPlanartouyvy_c(ysrc, usrc, vsrc, dst, width, height, lumStride,
                       chromStride, dstStride, 1);
@@ -496,9 +496,9 @@ static inline void yuv422ptouyvy_c(const uint8_t *ysrc, const uint8_t *usrc,
  */
 static inline void yuv422ptoyuy2_c(const uint8_t *ysrc, const uint8_t *usrc,
                                    const uint8_t *vsrc, uint8_t *dst,
-                                   long width, long height,
-                                   long lumStride, long chromStride,
-                                   long dstStride)
+                                   int width, int height,
+                                   int lumStride, int chromStride,
+                                   int dstStride)
 {
     yuvPlanartoyuy2_c(ysrc, usrc, vsrc, dst, width, height, lumStride,
                       chromStride, dstStride, 1);
@@ -510,14 +510,14 @@ static inline void yuv422ptoyuy2_c(const uint8_t *ysrc, const uint8_t *usrc,
  */
 static inline void yuy2toyv12_c(const uint8_t *src, uint8_t *ydst,
                                 uint8_t *udst, uint8_t *vdst,
-                                long width, long height,
-                                long lumStride, long chromStride,
-                                long srcStride)
+                                int width, int height,
+                                int lumStride, int chromStride,
+                                int srcStride)
 {
-    long y;
+    int y;
     const int chromWidth = width >> 1;
     for (y=0; y<height; y+=2) {
-        long i;
+        int i;
         for (i=0; i<chromWidth; i++) {
             ydst[2*i+0]     = src[4*i+0];
             udst[i]     = src[4*i+1];
@@ -538,10 +538,10 @@ static inline void yuy2toyv12_c(const uint8_t *src, uint8_t *ydst,
     }
 }
 
-static inline void planar2x_c(const uint8_t *src, uint8_t *dst, long srcWidth,
-                              long srcHeight, long srcStride, long dstStride)
+static inline void planar2x_c(const uint8_t *src, uint8_t *dst, int srcWidth,
+                              int srcHeight, int srcStride, int dstStride)
 {
-    long x,y;
+    int x,y;
 
     dst[0]= src[0];
 
@@ -598,14 +598,14 @@ static inline void planar2x_c(const uint8_t *src, uint8_t *dst, long srcWidth,
  */
 static inline void uyvytoyv12_c(const uint8_t *src, uint8_t *ydst,
                                 uint8_t *udst, uint8_t *vdst,
-                                long width, long height,
-                                long lumStride, long chromStride,
-                                long srcStride)
+                                int width, int height,
+                                int lumStride, int chromStride,
+                                int srcStride)
 {
-    long y;
+    int y;
     const int chromWidth = width >> 1;
     for (y=0; y<height; y+=2) {
-        long i;
+        int i;
         for (i=0; i<chromWidth; i++) {
             udst[i]     = src[4*i+0];
             ydst[2*i+0] = src[4*i+1];
@@ -634,14 +634,14 @@ static inline void uyvytoyv12_c(const uint8_t *src, uint8_t *ydst,
  * FIXME: Write HQ version.
  */
 void rgb24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
-                   uint8_t *vdst, long width, long height, long lumStride,
-                   long chromStride, long srcStride)
+                   uint8_t *vdst, int width, int height, int lumStride,
+                   int chromStride, int srcStride)
 {
-    long y;
+    int y;
     const int chromWidth = width >> 1;
     y=0;
     for (; y<height; y+=2) {
-        long i;
+        int i;
         for (i=0; i<chromWidth; i++) {
             unsigned int b = src[6*i+0];
             unsigned int g = src[6*i+1];
@@ -692,14 +692,14 @@ void rgb24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
 }
 
 static void interleaveBytes_c(const uint8_t *src1, const uint8_t *src2,
-                              uint8_t *dest, long width,
-                              long height, long src1Stride,
-                              long src2Stride, long dstStride)
+                              uint8_t *dest, int width,
+                              int height, int src1Stride,
+                              int src2Stride, int dstStride)
 {
-    long h;
+    int h;
 
     for (h=0; h < height; h++) {
-        long w;
+        int w;
         for (w=0; w < width; w++) {
             dest[2*w+0] = src1[w];
             dest[2*w+1] = src2[w];
@@ -712,12 +712,12 @@ static void interleaveBytes_c(const uint8_t *src1, const uint8_t *src2,
 
 static inline void vu9_to_vu12_c(const uint8_t *src1, const uint8_t *src2,
                                  uint8_t *dst1, uint8_t *dst2,
-                                 long width, long height,
-                                 long srcStride1, long srcStride2,
-                                 long dstStride1, long dstStride2)
+                                 int width, int height,
+                                 int srcStride1, int srcStride2,
+                                 int dstStride1, int dstStride2)
 {
     int y;
-    long x,w,h;
+    int x,w,h;
     w=width/2; h=height/2;
     for (y=0;y<h;y++) {
         const uint8_t* s1=src1+srcStride1*(y>>1);
@@ -735,12 +735,12 @@ static inline void vu9_to_vu12_c(const uint8_t *src1, const uint8_t *src2,
 
 static inline void yvu9_to_yuy2_c(const uint8_t *src1, const uint8_t *src2,
                                   const uint8_t *src3, uint8_t *dst,
-                                  long width, long height,
-                                  long srcStride1, long srcStride2,
-                                  long srcStride3, long dstStride)
+                                  int width, int height,
+                                  int srcStride1, int srcStride2,
+                                  int srcStride3, int dstStride)
 {
     int x;
-    long y,w,h;
+    int y,w,h;
     w=width/2; h=height;
     for (y=0;y<h;y++) {
         const uint8_t* yp=src1+srcStride1*y;
@@ -749,7 +749,7 @@ static inline void yvu9_to_yuy2_c(const uint8_t *src1, const uint8_t *src2,
         uint8_t* d=dst+dstStride*y;
         x=0;
         for (; x<w; x++) {
-            const long x2 = x<<2;
+            const int x2 = x<<2;
             d[8*x+0] = yp[x2];
             d[8*x+1] = up[x];
             d[8*x+2] = yp[x2+1];
@@ -836,11 +836,11 @@ static void extract_odd2avg_c(const uint8_t *src0, const uint8_t *src1,
 }
 
 static void yuyvtoyuv420_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
-                           const uint8_t *src, long width, long height,
-                           long lumStride, long chromStride, long srcStride)
+                           const uint8_t *src, int width, int height,
+                           int lumStride, int chromStride, int srcStride)
 {
-    long y;
-    const long chromWidth= -((-width)>>1);
+    int y;
+    const int chromWidth= -((-width)>>1);
 
     for (y=0; y<height; y++) {
         extract_even_c(src, ydst, width);
@@ -856,11 +856,11 @@ static void yuyvtoyuv420_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
 }
 
 static void yuyvtoyuv422_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
-                           const uint8_t *src, long width, long height,
-                           long lumStride, long chromStride, long srcStride)
+                           const uint8_t *src, int width, int height,
+                           int lumStride, int chromStride, int srcStride)
 {
-    long y;
-    const long chromWidth= -((-width)>>1);
+    int y;
+    const int chromWidth= -((-width)>>1);
 
     for (y=0; y<height; y++) {
         extract_even_c(src, ydst, width);
@@ -874,11 +874,11 @@ static void yuyvtoyuv422_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
 }
 
 static void uyvytoyuv420_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
-                           const uint8_t *src, long width, long height,
-                           long lumStride, long chromStride, long srcStride)
+                           const uint8_t *src, int width, int height,
+                           int lumStride, int chromStride, int srcStride)
 {
-    long y;
-    const long chromWidth= -((-width)>>1);
+    int y;
+    const int chromWidth= -((-width)>>1);
 
     for (y=0; y<height; y++) {
         extract_even_c(src + 1, ydst, width);
@@ -894,11 +894,11 @@ static void uyvytoyuv420_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
 }
 
 static void uyvytoyuv422_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
-                           const uint8_t *src, long width, long height,
-                           long lumStride, long chromStride, long srcStride)
+                           const uint8_t *src, int width, int height,
+                           int lumStride, int chromStride, int srcStride)
 {
-    long y;
-    const long chromWidth= -((-width)>>1);
+    int y;
+    const int chromWidth= -((-width)>>1);
 
     for (y=0; y<height; y++) {
         extract_even_c(src + 1, ydst, width);
diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index be8491e217..227126b182 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -1075,7 +1075,7 @@ static void fillPlane(uint8_t* plane, int stride, int width, int height, int y,
 }
 
 #define RGB48(name, R, B, READ)\
-static inline void name ## ToY(int16_t *dst, const uint16_t *src, long width, uint32_t *unused)\
+static inline void name ## ToY(int16_t *dst, const uint16_t *src, int width, uint32_t *unused)\
 {\
     int i;\
     for (i = 0; i < width; i++) {\
@@ -1089,7 +1089,7 @@ static inline void name ## ToY(int16_t *dst, const uint16_t *src, long width, ui
 \
 static inline void name ## ToUV(int16_t *dstU, int16_t *dstV,\
                              const uint16_t *src1, const uint16_t *src2,\
-                             long width, uint32_t *unused)\
+                             int width, uint32_t *unused)\
 {\
     int i;\
     assert(src1==src2);\
@@ -1105,7 +1105,7 @@ static inline void name ## ToUV(int16_t *dstU, int16_t *dstV,\
 \
 static inline void name ## ToUV_half(int16_t *dstU, int16_t *dstV,\
                                   const uint16_t *src1, const uint16_t *src2,\
-                                  long width, uint32_t *unused)\
+                                  int width, uint32_t *unused)\
 {\
     int i;\
     assert(src1==src2);\
@@ -1125,7 +1125,7 @@ RGB48(bgr48LE, 2, 0, AV_RL16)
 RGB48(bgr48BE, 2, 0, AV_RB16)
 
 #define BGR2Y(type, name, shr, shg, shb, maskr, maskg, maskb, RY, GY, BY, S)\
-static inline void name(int16_t *dst, const uint8_t *src, long width, uint32_t *unused)\
+static inline void name(int16_t *dst, const uint8_t *src, int width, uint32_t *unused)\
 {\
     int i;\
     for (i=0; i<width; i++) {\
@@ -1146,7 +1146,7 @@ BGR2Y(uint16_t, bgr15ToY, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, RY<<10, GY<<5, BY
 BGR2Y(uint16_t, rgb16ToY, 0, 0, 0, 0xF800, 0x07E0, 0x001F, RY    , GY<<5, BY<<11, RGB2YUV_SHIFT+8)
 BGR2Y(uint16_t, rgb15ToY, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, RY    , GY<<5, BY<<10, RGB2YUV_SHIFT+7)
 
-static inline void abgrToA(int16_t *dst, const uint8_t *src, long width, uint32_t *unused)
+static inline void abgrToA(int16_t *dst, const uint8_t *src, int width, uint32_t *unused)
 {
     int i;
     for (i=0; i<width; i++) {
@@ -1155,7 +1155,7 @@ static inline void abgrToA(int16_t *dst, const uint8_t *src, long width, uint32_
 }
 
 #define BGR2UV(type, name, shr, shg, shb, shp, maskr, maskg, maskb, RU, GU, BU, RV, GV, BV, S) \
-static inline void name(int16_t *dstU, int16_t *dstV, const uint8_t *src, const uint8_t *dummy, long width, uint32_t *unused)\
+static inline void name(int16_t *dstU, int16_t *dstV, const uint8_t *src, const uint8_t *dummy, int width, uint32_t *unused)\
 {\
     int i;\
     for (i=0; i<width; i++) {\
@@ -1167,7 +1167,7 @@ static inline void name(int16_t *dstU, int16_t *dstV, const uint8_t *src, const
         dstV[i]= ((RV)*r + (GV)*g + (BV)*b + (256<<((S)-1)) + (1<<(S-7)))>>((S)-6);\
     }\
 }\
-static inline void name ## _half(int16_t *dstU, int16_t *dstV, const uint8_t *src, const uint8_t *dummy, long width, uint32_t *unused)\
+static inline void name ## _half(int16_t *dstU, int16_t *dstV, const uint8_t *src, const uint8_t *dummy, int width, uint32_t *unused)\
 {\
     int i;\
     for (i=0; i<width; i++) {\
@@ -1194,7 +1194,7 @@ BGR2UV(uint16_t, bgr15ToUV, 0, 0, 0, 0,   0x001F, 0x03E0,   0x7C00, RU<<10, GU<<
 BGR2UV(uint16_t, rgb16ToUV, 0, 0, 0, 0,   0xF800, 0x07E0,   0x001F, RU    , GU<<5, BU<<11, RV    , GV<<5, BV<<11, RGB2YUV_SHIFT+8)
 BGR2UV(uint16_t, rgb15ToUV, 0, 0, 0, 0,   0x7C00, 0x03E0,   0x001F, RU    , GU<<5, BU<<10, RV    , GV<<5, BV<<10, RGB2YUV_SHIFT+7)
 
-static inline void palToA(int16_t *dst, const uint8_t *src, long width, uint32_t *pal)
+static inline void palToA(int16_t *dst, const uint8_t *src, int width, uint32_t *pal)
 {
     int i;
     for (i=0; i<width; i++) {
@@ -1216,7 +1216,7 @@ static inline void palToY(int16_t *dst, const uint8_t *src, long width, uint32_t
 
 static inline void palToUV(uint16_t *dstU, int16_t *dstV,
                            const uint8_t *src1, const uint8_t *src2,
-                           long width, uint32_t *pal)
+                           int width, uint32_t *pal)
 {
     int i;
     assert(src1 == src2);
@@ -1228,7 +1228,7 @@ static inline void palToUV(uint16_t *dstU, int16_t *dstV,
     }
 }
 
-static inline void monowhite2Y(int16_t *dst, const uint8_t *src, long width, uint32_t *unused)
+static inline void monowhite2Y(int16_t *dst, const uint8_t *src, int width, uint32_t *unused)
 {
     int i, j;
     for (i=0; i<width/8; i++) {
@@ -1238,7 +1238,7 @@ static inline void monowhite2Y(int16_t *dst, const uint8_t *src, long width, uin
     }
 }
 
-static inline void monoblack2Y(int16_t *dst, const uint8_t *src, long width, uint32_t *unused)
+static inline void monoblack2Y(int16_t *dst, const uint8_t *src, int width, uint32_t *unused)
 {
     int i, j;
     for (i=0; i<width/8; i++) {
@@ -1431,24 +1431,24 @@ static int uyvyToYuv422Wrapper(SwsContext *c, const uint8_t* src[], int srcStrid
     return srcSliceH;
 }
 
-static void gray8aToPacked32(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette)
+static void gray8aToPacked32(const uint8_t *src, uint8_t *dst, int num_pixels, const uint8_t *palette)
 {
-    long i;
+    int i;
     for (i=0; i<num_pixels; i++)
         ((uint32_t *) dst)[i] = ((const uint32_t *)palette)[src[i<<1]] | (src[(i<<1)+1] << 24);
 }
 
-static void gray8aToPacked32_1(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette)
+static void gray8aToPacked32_1(const uint8_t *src, uint8_t *dst, int num_pixels, const uint8_t *palette)
 {
-    long i;
+    int i;
 
     for (i=0; i<num_pixels; i++)
         ((uint32_t *) dst)[i] = ((const uint32_t *)palette)[src[i<<1]] | src[(i<<1)+1];
 }
 
-static void gray8aToPacked24(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette)
+static void gray8aToPacked24(const uint8_t *src, uint8_t *dst, int num_pixels, const uint8_t *palette)
 {
-    long i;
+    int i;
 
     for (i=0; i<num_pixels; i++) {
         //FIXME slow?
@@ -1464,7 +1464,7 @@ static int palToRgbWrapper(SwsContext *c, const uint8_t* src[], int srcStride[],
 {
     const enum PixelFormat srcFormat= c->srcFormat;
     const enum PixelFormat dstFormat= c->dstFormat;
-    void (*conv)(const uint8_t *src, uint8_t *dst, long num_pixels,
+    void (*conv)(const uint8_t *src, uint8_t *dst, int num_pixels,
                  const uint8_t *palette)=NULL;
     int i;
     uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
@@ -1521,7 +1521,7 @@ static int rgbToRgbWrapper(SwsContext *c, const uint8_t* src[], int srcStride[],
     const int dstBpp= (c->dstFormatBpp + 7) >> 3;
     const int srcId= c->srcFormatBpp >> 2; /* 1:0, 4:1, 8:2, 15:3, 16:4, 24:6, 32:8 */
     const int dstId= c->dstFormatBpp >> 2;
-    void (*conv)(const uint8_t *src, uint8_t *dst, long src_size)=NULL;
+    void (*conv)(const uint8_t *src, uint8_t *dst, int src_size)=NULL;
 
 #define CONV_IS(src, dst) (srcFormat == PIX_FMT_##src && dstFormat == PIX_FMT_##dst)
 
@@ -2085,18 +2085,18 @@ int sws_scale_ordered(SwsContext *c, const uint8_t* const src[], int srcStride[]
 #endif
 
 /* Convert the palette to the same packed 32-bit format as the palette */
-void sws_convertPalette8ToPacked32(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette)
+void sws_convertPalette8ToPacked32(const uint8_t *src, uint8_t *dst, int num_pixels, const uint8_t *palette)
 {
-    long i;
+    int i;
 
     for (i=0; i<num_pixels; i++)
         ((uint32_t *) dst)[i] = ((const uint32_t *) palette)[src[i]];
 }
 
 /* Palette format: ABCD -> dst format: ABC */
-void sws_convertPalette8ToPacked24(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette)
+void sws_convertPalette8ToPacked24(const uint8_t *src, uint8_t *dst, int num_pixels, const uint8_t *palette)
 {
-    long i;
+    int i;
 
     for (i=0; i<num_pixels; i++) {
         //FIXME slow?
diff --git a/libswscale/swscale.h b/libswscale/swscale.h
index f48b1d6210..7ac2fe27bb 100644
--- a/libswscale/swscale.h
+++ b/libswscale/swscale.h
@@ -351,7 +351,7 @@ struct SwsContext *sws_getCachedContext(struct SwsContext *context,
  * @param num_pixels number of pixels to convert
  * @param palette    array with [256] entries, which must match color arrangement (RGB or BGR) of src
  */
-void sws_convertPalette8ToPacked32(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette);
+void sws_convertPalette8ToPacked32(const uint8_t *src, uint8_t *dst, int num_pixels, const uint8_t *palette);
 
 /**
  * Converts an 8bit paletted frame into a frame with a color depth of 24 bits.
@@ -363,7 +363,7 @@ void sws_convertPalette8ToPacked32(const uint8_t *src, uint8_t *dst, long num_pi
  * @param num_pixels number of pixels to convert
  * @param palette    array with [256] entries, which must match color arrangement (RGB or BGR) of src
  */
-void sws_convertPalette8ToPacked24(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette);
+void sws_convertPalette8ToPacked24(const uint8_t *src, uint8_t *dst, int num_pixels, const uint8_t *palette);
 
 
 #endif /* SWSCALE_SWSCALE_H */
diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h
index cac40b22f8..327919fb55 100644
--- a/libswscale/swscale_internal.h
+++ b/libswscale/swscale_internal.h
@@ -194,6 +194,7 @@ typedef struct SwsContext {
 #define Y_TEMP                "11*8+4*4*256*2+40"
 #define ALP_MMX_FILTER_OFFSET "11*8+4*4*256*2+48"
 #define UV_OFF                "11*8+4*4*256*3+48"
+#define UV_OFFx2              "11*8+4*4*256*3+56"
 
     DECLARE_ALIGNED(8, uint64_t, redDither);
     DECLARE_ALIGNED(8, uint64_t, greenDither);
@@ -217,6 +218,7 @@ typedef struct SwsContext {
     DECLARE_ALIGNED(8, uint64_t, y_temp);
     int32_t  alpMmxFilter[4*MAX_FILTER_SIZE];
     DECLARE_ALIGNED(8, ptrdiff_t, uv_off); ///< offset (in pixels) between u and v planes
+    DECLARE_ALIGNED(8, ptrdiff_t, uv_offx2); ///< offset (in bytes) between u and v planes
 
 #if HAVE_ALTIVEC
     vector signed short   CY;
@@ -259,7 +261,7 @@ typedef struct SwsContext {
                         const int16_t *chrVSrc, const int16_t *alpSrc,
                         uint8_t *dest,
                         uint8_t *uDest, uint8_t *vDest, uint8_t *aDest,
-                        long dstW, long chrDstW);
+                        int dstW, int chrDstW);
     void (*yuv2yuvX   )(struct SwsContext *c,
                         const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
                         const int16_t *chrFilter, const int16_t **chrUSrc,
@@ -267,7 +269,7 @@ typedef struct SwsContext {
                         const int16_t **alpSrc,
                         uint8_t *dest,
                         uint8_t *uDest, uint8_t *vDest, uint8_t *aDest,
-                        long dstW, long chrDstW);
+                        int dstW, int chrDstW);
     void (*yuv2packed1)(struct SwsContext *c,
                         const uint16_t *buf0,
                         const uint16_t *ubuf0, const uint16_t *ubuf1,
@@ -287,26 +289,26 @@ typedef struct SwsContext {
                         const int16_t *chrFilter, const int16_t **chrUSrc,
                         const int16_t **chrVSrc, int chrFilterSize,
                         const int16_t **alpSrc, uint8_t *dest,
-                        long dstW, long dstY);
+                        int dstW, int dstY);
 
     void (*lumToYV12)(uint8_t *dst, const uint8_t *src,
-                      long width, uint32_t *pal); ///< Unscaled conversion of luma plane to YV12 for horizontal scaler.
+                      int width, uint32_t *pal); ///< Unscaled conversion of luma plane to YV12 for horizontal scaler.
     void (*alpToYV12)(uint8_t *dst, const uint8_t *src,
-                      long width, uint32_t *pal); ///< Unscaled conversion of alpha plane to YV12 for horizontal scaler.
+                      int width, uint32_t *pal); ///< Unscaled conversion of alpha plane to YV12 for horizontal scaler.
     void (*chrToYV12)(uint8_t *dstU, uint8_t *dstV,
                       const uint8_t *src1, const uint8_t *src2,
-                      long width, uint32_t *pal); ///< Unscaled conversion of chroma planes to YV12 for horizontal scaler.
+                      int width, uint32_t *pal); ///< Unscaled conversion of chroma planes to YV12 for horizontal scaler.
     void (*hyscale_fast)(struct SwsContext *c,
-                         int16_t *dst, long dstWidth,
+                         int16_t *dst, int dstWidth,
                          const uint8_t *src, int srcW, int xInc);
     void (*hcscale_fast)(struct SwsContext *c,
-                         int16_t *dst1, int16_t *dst2, long dstWidth,
+                         int16_t *dst1, int16_t *dst2, int dstWidth,
                          const uint8_t *src1, const uint8_t *src2,
                          int srcW, int xInc);
 
     void (*hScale)(int16_t *dst, int dstW, const uint8_t *src, int srcW,
                    int xInc, const int16_t *filter, const int16_t *filterPos,
-                   long filterSize);
+                   int filterSize);
 
     void (*hScale16)(int16_t *dst, int dstW, const uint16_t *src, int srcW,
                    int xInc, const int16_t *filter, const int16_t *filterPos,
diff --git a/libswscale/swscale_template.c b/libswscale/swscale_template.c
index 0f40f5b95e..98de521cb1 100644
--- a/libswscale/swscale_template.c
+++ b/libswscale/swscale_template.c
@@ -24,7 +24,7 @@ static inline void yuv2yuvX_c(SwsContext *c, const int16_t *lumFilter,
                               const int16_t **chrVSrc,
                               int chrFilterSize, const int16_t **alpSrc,
                               uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
-                              uint8_t *aDest, long dstW, long chrDstW)
+                              uint8_t *aDest, int dstW, int chrDstW)
 {
     yuv2yuvXinC(lumFilter, lumSrc, lumFilterSize,
                 chrFilter, chrUSrc, chrVSrc, chrFilterSize,
@@ -47,7 +47,7 @@ static inline void yuv2yuv1_c(SwsContext *c, const int16_t *lumSrc,
                               const int16_t *chrUSrc, const int16_t *chrVSrc,
                               const int16_t *alpSrc,
                               uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
-                              uint8_t *aDest, long dstW, long chrDstW)
+                              uint8_t *aDest, int dstW, int chrDstW)
 {
     int i;
     for (i=0; i<dstW; i++) {
@@ -79,7 +79,7 @@ static inline void yuv2packedX_c(SwsContext *c, const int16_t *lumFilter,
                                  const int16_t *chrFilter, const int16_t **chrUSrc,
                                  const int16_t **chrVSrc,
                                  int chrFilterSize, const int16_t **alpSrc,
-                                 uint8_t *dest, long dstW, long dstY)
+                                 uint8_t *dest, int dstW, int dstY)
 {
         yuv2packedXinC(c, lumFilter, lumSrc, lumFilterSize,
                        chrFilter, chrUSrc, chrVSrc, chrFilterSize,
@@ -128,7 +128,7 @@ static inline void yuv2packed1_c(SwsContext *c, const uint16_t *buf0,
 
 //FIXME yuy2* can read up to 7 samples too much
 
-static inline void yuy2ToY_c(uint8_t *dst, const uint8_t *src, long width,
+static inline void yuy2ToY_c(uint8_t *dst, const uint8_t *src, int width,
                              uint32_t *unused)
 {
     int i;
@@ -137,7 +137,7 @@ static inline void yuy2ToY_c(uint8_t *dst, const uint8_t *src, long width,
 }
 
 static inline void yuy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
-                              const uint8_t *src2, long width, uint32_t *unused)
+                              const uint8_t *src2, int width, uint32_t *unused)
 {
     int i;
     for (i=0; i<width; i++) {
@@ -148,7 +148,7 @@ static inline void yuy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
 }
 
 static inline void LEToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
-                            const uint8_t *src2, long width, uint32_t *unused)
+                            const uint8_t *src2, int width, uint32_t *unused)
 {
     int i;
     // FIXME I don't think this code is right for YUV444/422, since then h is not subsampled so
@@ -161,7 +161,7 @@ static inline void LEToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
 
 /* This is almost identical to the previous, end exists only because
  * yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses. */
-static inline void uyvyToY_c(uint8_t *dst, const uint8_t *src, long width,
+static inline void uyvyToY_c(uint8_t *dst, const uint8_t *src, int width,
                              uint32_t *unused)
 {
     int i;
@@ -170,7 +170,7 @@ static inline void uyvyToY_c(uint8_t *dst, const uint8_t *src, long width,
 }
 
 static inline void uyvyToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
-                              const uint8_t *src2, long width, uint32_t *unused)
+                              const uint8_t *src2, int width, uint32_t *unused)
 {
     int i;
     for (i=0; i<width; i++) {
@@ -181,7 +181,7 @@ static inline void uyvyToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
 }
 
 static inline void BEToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
-                            const uint8_t *src2, long width, uint32_t *unused)
+                            const uint8_t *src2, int width, uint32_t *unused)
 {
     int i;
     for (i=0; i<width; i++) {
@@ -191,7 +191,7 @@ static inline void BEToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
 }
 
 static inline void nvXXtoUV_c(uint8_t *dst1, uint8_t *dst2,
-                              const uint8_t *src, long width)
+                              const uint8_t *src, int width)
 {
     int i;
     for (i = 0; i < width; i++) {
@@ -205,7 +205,7 @@ static inline void nvXXtoUV_c(uint8_t *dst1, uint8_t *dst2,
 #define YUV_NBPS(depth, endianness, rfunc) \
 static inline void endianness ## depth ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
                                           const uint16_t *srcU, const uint16_t *srcV, \
-                                          long width, uint32_t *unused) \
+                                          int width, uint32_t *unused) \
 { \
     int i; \
     for (i = 0; i < width; i++) { \
@@ -214,7 +214,7 @@ static inline void endianness ## depth ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
     } \
 } \
 \
-static inline void endianness ## depth ## ToY_c(uint8_t *dstY, const uint16_t *srcY, long width, uint32_t *unused) \
+static inline void endianness ## depth ## ToY_c(uint8_t *dstY, const uint16_t *srcY, int width, uint32_t *unused) \
 { \
     int i; \
     for (i = 0; i < width; i++) \
@@ -229,20 +229,20 @@ YUV_NBPS(10, BE, AV_RB16)
 
 static inline void nv12ToUV_c(uint8_t *dstU, uint8_t *dstV,
                               const uint8_t *src1, const uint8_t *src2,
-                              long width, uint32_t *unused)
+                              int width, uint32_t *unused)
 {
     nvXXtoUV_c(dstU, dstV, src1, width);
 }
 
 static inline void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV,
                               const uint8_t *src1, const uint8_t *src2,
-                              long width, uint32_t *unused)
+                              int width, uint32_t *unused)
 {
     nvXXtoUV_c(dstV, dstU, src1, width);
 }
 
 static inline void bgr24ToY_c(int16_t *dst, const uint8_t *src,
-                              long width, uint32_t *unused)
+                              int width, uint32_t *unused)
 {
     int i;
     for (i=0; i<width; i++) {
@@ -255,7 +255,7 @@ static inline void bgr24ToY_c(int16_t *dst, const uint8_t *src,
 }
 
 static inline void bgr24ToUV_c(int16_t *dstU, int16_t *dstV, const uint8_t *src1,
-                               const uint8_t *src2, long width, uint32_t *unused)
+                               const uint8_t *src2, int width, uint32_t *unused)
 {
     int i;
     for (i=0; i<width; i++) {
@@ -270,7 +270,7 @@ static inline void bgr24ToUV_c(int16_t *dstU, int16_t *dstV, const uint8_t *src1
 }
 
 static inline void bgr24ToUV_half_c(int16_t *dstU, int16_t *dstV, const uint8_t *src1,
-                                    const uint8_t *src2, long width, uint32_t *unused)
+                                    const uint8_t *src2, int width, uint32_t *unused)
 {
     int i;
     for (i=0; i<width; i++) {
@@ -284,7 +284,7 @@ static inline void bgr24ToUV_half_c(int16_t *dstU, int16_t *dstV, const uint8_t
     assert(src1 == src2);
 }
 
-static inline void rgb24ToY_c(int16_t *dst, const uint8_t *src, long width,
+static inline void rgb24ToY_c(int16_t *dst, const uint8_t *src, int width,
                               uint32_t *unused)
 {
     int i;
@@ -298,7 +298,7 @@ static inline void rgb24ToY_c(int16_t *dst, const uint8_t *src, long width,
 }
 
 static inline void rgb24ToUV_c(int16_t *dstU, int16_t *dstV, const uint8_t *src1,
-                               const uint8_t *src2, long width, uint32_t *unused)
+                               const uint8_t *src2, int width, uint32_t *unused)
 {
     int i;
     assert(src1==src2);
@@ -313,7 +313,7 @@ static inline void rgb24ToUV_c(int16_t *dstU, int16_t *dstV, const uint8_t *src1
 }
 
 static inline void rgb24ToUV_half_c(int16_t *dstU, int16_t *dstV, const uint8_t *src1,
-                                    const uint8_t *src2, long width, uint32_t *unused)
+                                    const uint8_t *src2, int width, uint32_t *unused)
 {
     int i;
     assert(src1==src2);
@@ -332,7 +332,7 @@ static inline void rgb24ToUV_half_c(int16_t *dstU, int16_t *dstV, const uint8_t
 static inline void hScale_c(int16_t *dst, int dstW, const uint8_t *src,
                             int srcW, int xInc,
                             const int16_t *filter, const int16_t *filterPos,
-                            long filterSize)
+                            int filterSize)
 {
     int i;
     for (i=0; i<dstW; i++) {
@@ -408,7 +408,7 @@ static void lumRangeFromJpeg_c(int16_t *dst, int width)
         dst[i] = (dst[i]*14071 + 33561947)>>14;
 }
 
-static inline void hyscale_fast_c(SwsContext *c, int16_t *dst, long dstWidth,
+static inline void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth,
                                   const uint8_t *src, int srcW, int xInc)
 {
     int i;
@@ -422,14 +422,14 @@ static inline void hyscale_fast_c(SwsContext *c, int16_t *dst, long dstWidth,
 }
 
       // *** horizontal scale Y line to temp buffer
-static inline void hyscale_c(SwsContext *c, uint16_t *dst, long dstWidth,
+static inline void hyscale_c(SwsContext *c, uint16_t *dst, int dstWidth,
                              const uint8_t *src, int srcW, int xInc,
                              const int16_t *hLumFilter,
                              const int16_t *hLumFilterPos, int hLumFilterSize,
                              uint8_t *formatConvBuffer,
                              uint32_t *pal, int isAlpha)
 {
-    void (*toYV12)(uint8_t *, const uint8_t *, long, uint32_t *) = isAlpha ? c->alpToYV12 : c->lumToYV12;
+    void (*toYV12)(uint8_t *, const uint8_t *, int, uint32_t *) = isAlpha ? c->alpToYV12 : c->lumToYV12;
     void (*convertRange)(int16_t *, int) = isAlpha ? NULL : c->lumConvertRange;
 
     src += isAlpha ? c->alpSrcOffset : c->lumSrcOffset;
@@ -453,7 +453,7 @@ static inline void hyscale_c(SwsContext *c, uint16_t *dst, long dstWidth,
 }
 
 static inline void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2,
-                                  long dstWidth, const uint8_t *src1,
+                                  int dstWidth, const uint8_t *src1,
                                   const uint8_t *src2, int srcW, int xInc)
 {
     int i;
@@ -467,7 +467,7 @@ static inline void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2,
     }
 }
 
-inline static void hcscale_c(SwsContext *c, uint16_t *dst1, uint16_t *dst2, long dstWidth,
+inline static void hcscale_c(SwsContext *c, uint16_t *dst1, uint16_t *dst2, int dstWidth,
                              const uint8_t *src1, const uint8_t *src2,
                              int srcW, int xInc, const int16_t *hChrFilter,
                              const int16_t *hChrFilterPos, int hChrFilterSize,
diff --git a/libswscale/utils.c b/libswscale/utils.c
index 96d6efbb8c..23ad8279cf 100644
--- a/libswscale/utils.c
+++ b/libswscale/utils.c
@@ -1009,6 +1009,7 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter)
         c->lumPixBuf[i] = c->lumPixBuf[i+c->vLumBufSize];
     }
     c->uv_off = dst_stride_px;
+    c->uv_offx2 = dst_stride;
     for (i=0; i<c->vChrBufSize; i++) {
         FF_ALLOC_OR_GOTO(c, c->chrUPixBuf[i+c->vChrBufSize], dst_stride*2+1, fail);
         c->chrUPixBuf[i] = c->chrUPixBuf[i+c->vChrBufSize];
diff --git a/libswscale/x86/rgb2rgb_template.c b/libswscale/x86/rgb2rgb_template.c
index c4245afb34..baef3f8ae5 100644
--- a/libswscale/x86/rgb2rgb_template.c
+++ b/libswscale/x86/rgb2rgb_template.c
@@ -61,7 +61,7 @@
 
 #if !COMPILE_TEMPLATE_AMD3DNOW
 
-static inline void RENAME(rgb24tobgr32)(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void RENAME(rgb24tobgr32)(const uint8_t *src, uint8_t *dst, int src_size)
 {
     uint8_t *dest = dst;
     const uint8_t *s = src;
@@ -143,7 +143,7 @@ static inline void RENAME(rgb24tobgr32)(const uint8_t *src, uint8_t *dst, long s
             MOVNTQ"     %%mm4, 16%0"
 
 
-static inline void RENAME(rgb32tobgr24)(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void RENAME(rgb32tobgr24)(const uint8_t *src, uint8_t *dst, int src_size)
 {
     uint8_t *dest = dst;
     const uint8_t *s = src;
@@ -186,7 +186,7 @@ static inline void RENAME(rgb32tobgr24)(const uint8_t *src, uint8_t *dst, long s
  MMX2, 3DNOW optimization by Nick Kurshev
  32-bit C version, and and&add trick by Michael Niedermayer
 */
-static inline void RENAME(rgb15to16)(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void RENAME(rgb15to16)(const uint8_t *src, uint8_t *dst, int src_size)
 {
     register const uint8_t* s=src;
     register uint8_t* d=dst;
@@ -230,7 +230,7 @@ static inline void RENAME(rgb15to16)(const uint8_t *src, uint8_t *dst, long src_
     }
 }
 
-static inline void RENAME(rgb16to15)(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void RENAME(rgb16to15)(const uint8_t *src, uint8_t *dst, int src_size)
 {
     register const uint8_t* s=src;
     register uint8_t* d=dst;
@@ -279,7 +279,7 @@ static inline void RENAME(rgb16to15)(const uint8_t *src, uint8_t *dst, long src_
     }
 }
 
-static inline void RENAME(rgb32to16)(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void RENAME(rgb32to16)(const uint8_t *src, uint8_t *dst, int src_size)
 {
     const uint8_t *s = src;
     const uint8_t *end;
@@ -371,7 +371,7 @@ static inline void RENAME(rgb32to16)(const uint8_t *src, uint8_t *dst, long src_
     }
 }
 
-static inline void RENAME(rgb32tobgr16)(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void RENAME(rgb32tobgr16)(const uint8_t *src, uint8_t *dst, int src_size)
 {
     const uint8_t *s = src;
     const uint8_t *end;
@@ -426,7 +426,7 @@ static inline void RENAME(rgb32tobgr16)(const uint8_t *src, uint8_t *dst, long s
     }
 }
 
-static inline void RENAME(rgb32to15)(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void RENAME(rgb32to15)(const uint8_t *src, uint8_t *dst, int src_size)
 {
     const uint8_t *s = src;
     const uint8_t *end;
@@ -518,7 +518,7 @@ static inline void RENAME(rgb32to15)(const uint8_t *src, uint8_t *dst, long src_
     }
 }
 
-static inline void RENAME(rgb32tobgr15)(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void RENAME(rgb32tobgr15)(const uint8_t *src, uint8_t *dst, int src_size)
 {
     const uint8_t *s = src;
     const uint8_t *end;
@@ -573,7 +573,7 @@ static inline void RENAME(rgb32tobgr15)(const uint8_t *src, uint8_t *dst, long s
     }
 }
 
-static inline void RENAME(rgb24tobgr16)(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void RENAME(rgb24tobgr16)(const uint8_t *src, uint8_t *dst, int src_size)
 {
     const uint8_t *s = src;
     const uint8_t *end;
@@ -630,7 +630,7 @@ static inline void RENAME(rgb24tobgr16)(const uint8_t *src, uint8_t *dst, long s
     }
 }
 
-static inline void RENAME(rgb24to16)(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void RENAME(rgb24to16)(const uint8_t *src, uint8_t *dst, int src_size)
 {
     const uint8_t *s = src;
     const uint8_t *end;
@@ -687,7 +687,7 @@ static inline void RENAME(rgb24to16)(const uint8_t *src, uint8_t *dst, long src_
     }
 }
 
-static inline void RENAME(rgb24tobgr15)(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void RENAME(rgb24tobgr15)(const uint8_t *src, uint8_t *dst, int src_size)
 {
     const uint8_t *s = src;
     const uint8_t *end;
@@ -744,7 +744,7 @@ static inline void RENAME(rgb24tobgr15)(const uint8_t *src, uint8_t *dst, long s
     }
 }
 
-static inline void RENAME(rgb24to15)(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void RENAME(rgb24to15)(const uint8_t *src, uint8_t *dst, int src_size)
 {
     const uint8_t *s = src;
     const uint8_t *end;
@@ -822,7 +822,7 @@ static inline void RENAME(rgb24to15)(const uint8_t *src, uint8_t *dst, long src_
        |
    original bits
 */
-static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int src_size)
 {
     const uint16_t *end;
     const uint16_t *mm_end;
@@ -925,7 +925,7 @@ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, long s
     }
 }
 
-static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int src_size)
 {
     const uint16_t *end;
     const uint16_t *mm_end;
@@ -1046,7 +1046,7 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, long s
     MOVNTQ"     %%mm0,  %0      \n\t"                               \
     MOVNTQ"     %%mm3, 8%0      \n\t"                               \
 
-static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, int src_size)
 {
     const uint16_t *end;
     const uint16_t *mm_end;
@@ -1088,7 +1088,7 @@ static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, long src_
     }
 }
 
-static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, int src_size)
 {
     const uint16_t *end;
     const uint16_t *mm_end;
@@ -1130,7 +1130,7 @@ static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, long src_
     }
 }
 
-static inline void RENAME(shuffle_bytes_2103)(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void RENAME(shuffle_bytes_2103)(const uint8_t *src, uint8_t *dst, int src_size)
 {
     x86_reg idx = 15 - src_size;
     const uint8_t *s = src-idx;
@@ -1192,7 +1192,7 @@ static inline void RENAME(shuffle_bytes_2103)(const uint8_t *src, uint8_t *dst,
     }
 }
 
-static inline void RENAME(rgb24tobgr24)(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void RENAME(rgb24tobgr24)(const uint8_t *src, uint8_t *dst, int src_size)
 {
     unsigned i;
     x86_reg mmx_size= 23 - src_size;
@@ -1260,10 +1260,10 @@ static inline void RENAME(rgb24tobgr24)(const uint8_t *src, uint8_t *dst, long s
 }
 
 static inline void RENAME(yuvPlanartoyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
-                                           long width, long height,
-                                           long lumStride, long chromStride, long dstStride, long vertLumPerChroma)
+                                           int width, int height,
+                                           int lumStride, int chromStride, int dstStride, int vertLumPerChroma)
 {
-    long y;
+    int y;
     const x86_reg chromWidth= width>>1;
     for (y=0; y<height; y++) {
         //FIXME handle 2 lines at once (fewer prefetches, reuse some chroma, but very likely memory-limited anyway)
@@ -1317,18 +1317,18 @@ static inline void RENAME(yuvPlanartoyuy2)(const uint8_t *ysrc, const uint8_t *u
  * (If this is a problem for anyone then tell me, and I will fix it.)
  */
 static inline void RENAME(yv12toyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
-                                      long width, long height,
-                                      long lumStride, long chromStride, long dstStride)
+                                      int width, int height,
+                                      int lumStride, int chromStride, int dstStride)
 {
     //FIXME interpolate chroma
     RENAME(yuvPlanartoyuy2)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 2);
 }
 
 static inline void RENAME(yuvPlanartouyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
-                                           long width, long height,
-                                           long lumStride, long chromStride, long dstStride, long vertLumPerChroma)
+                                           int width, int height,
+                                           int lumStride, int chromStride, int dstStride, int vertLumPerChroma)
 {
-    long y;
+    int y;
     const x86_reg chromWidth= width>>1;
     for (y=0; y<height; y++) {
         //FIXME handle 2 lines at once (fewer prefetches, reuse some chroma, but very likely memory-limited anyway)
@@ -1382,8 +1382,8 @@ static inline void RENAME(yuvPlanartouyvy)(const uint8_t *ysrc, const uint8_t *u
  * (If this is a problem for anyone then tell me, and I will fix it.)
  */
 static inline void RENAME(yv12touyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
-                                      long width, long height,
-                                      long lumStride, long chromStride, long dstStride)
+                                      int width, int height,
+                                      int lumStride, int chromStride, int dstStride)
 {
     //FIXME interpolate chroma
     RENAME(yuvPlanartouyvy)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 2);
@@ -1393,8 +1393,8 @@ static inline void RENAME(yv12touyvy)(const uint8_t *ysrc, const uint8_t *usrc,
  * Width should be a multiple of 16.
  */
 static inline void RENAME(yuv422ptouyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
-                                         long width, long height,
-                                         long lumStride, long chromStride, long dstStride)
+                                         int width, int height,
+                                         int lumStride, int chromStride, int dstStride)
 {
     RENAME(yuvPlanartouyvy)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 1);
 }
@@ -1403,8 +1403,8 @@ static inline void RENAME(yuv422ptouyvy)(const uint8_t *ysrc, const uint8_t *usr
  * Width should be a multiple of 16.
  */
 static inline void RENAME(yuv422ptoyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
-                                         long width, long height,
-                                         long lumStride, long chromStride, long dstStride)
+                                         int width, int height,
+                                         int lumStride, int chromStride, int dstStride)
 {
     RENAME(yuvPlanartoyuy2)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 1);
 }
@@ -1414,10 +1414,10 @@ static inline void RENAME(yuv422ptoyuy2)(const uint8_t *ysrc, const uint8_t *usr
  * (If this is a problem for anyone then tell me, and I will fix it.)
  */
 static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
-                                      long width, long height,
-                                      long lumStride, long chromStride, long srcStride)
+                                      int width, int height,
+                                      int lumStride, int chromStride, int srcStride)
 {
-    long y;
+    int y;
     const x86_reg chromWidth= width>>1;
     for (y=0; y<height; y+=2) {
         __asm__ volatile(
@@ -1513,9 +1513,9 @@ static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t
 #endif /* !COMPILE_TEMPLATE_AMD3DNOW */
 
 #if COMPILE_TEMPLATE_MMX2 || COMPILE_TEMPLATE_AMD3DNOW
-static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, long srcWidth, long srcHeight, long srcStride, long dstStride)
+static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, int srcWidth, int srcHeight, int srcStride, int dstStride)
 {
-    long x,y;
+    int x,y;
 
     dst[0]= src[0];
 
@@ -1612,10 +1612,10 @@ static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, long srcWi
  * FIXME: Write HQ version.
  */
 static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
-                                      long width, long height,
-                                      long lumStride, long chromStride, long srcStride)
+                                      int width, int height,
+                                      int lumStride, int chromStride, int srcStride)
 {
-    long y;
+    int y;
     const x86_reg chromWidth= width>>1;
     for (y=0; y<height; y+=2) {
         __asm__ volatile(
@@ -1718,13 +1718,13 @@ static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t
  * FIXME: Write HQ version.
  */
 static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
-                                       long width, long height,
-                                       long lumStride, long chromStride, long srcStride)
+                                       int width, int height,
+                                       int lumStride, int chromStride, int srcStride)
 {
-    long y;
+    int y;
     const x86_reg chromWidth= width>>1;
     for (y=0; y<height-2; y+=2) {
-        long i;
+        int i;
         for (i=0; i<2; i++) {
             __asm__ volatile(
                 "mov                        %2, %%"REG_a"   \n\t"
@@ -1963,13 +1963,13 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
 
 #if !COMPILE_TEMPLATE_AMD3DNOW
 static void RENAME(interleaveBytes)(const uint8_t *src1, const uint8_t *src2, uint8_t *dest,
-                                    long width, long height, long src1Stride,
-                                    long src2Stride, long dstStride)
+                                    int width, int height, int src1Stride,
+                                    int src2Stride, int dstStride)
 {
-    long h;
+    int h;
 
     for (h=0; h < height; h++) {
-        long w;
+        int w;
 
 #if COMPILE_TEMPLATE_SSE2
         __asm__(
@@ -2037,12 +2037,12 @@ static void RENAME(interleaveBytes)(const uint8_t *src1, const uint8_t *src2, ui
 #if !COMPILE_TEMPLATE_AMD3DNOW
 static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2,
                                        uint8_t *dst1, uint8_t *dst2,
-                                       long width, long height,
-                                       long srcStride1, long srcStride2,
-                                       long dstStride1, long dstStride2)
+                                       int width, int height,
+                                       int srcStride1, int srcStride2,
+                                       int dstStride1, int dstStride2)
 {
     x86_reg y;
-    long x,w,h;
+    int x,w,h;
     w=width/2; h=height/2;
     __asm__ volatile(
         PREFETCH" %0    \n\t"
@@ -2131,12 +2131,12 @@ static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2,
 
 static inline void RENAME(yvu9_to_yuy2)(const uint8_t *src1, const uint8_t *src2, const uint8_t *src3,
                                         uint8_t *dst,
-                                        long width, long height,
-                                        long srcStride1, long srcStride2,
-                                        long srcStride3, long dstStride)
+                                        int width, int height,
+                                        int srcStride1, int srcStride2,
+                                        int srcStride3, int dstStride)
 {
     x86_reg x;
-    long y,w,h;
+    int y,w,h;
     w=width/2; h=height;
     for (y=0;y<h;y++) {
         const uint8_t* yp=src1+srcStride1*y;
@@ -2197,7 +2197,7 @@ static inline void RENAME(yvu9_to_yuy2)(const uint8_t *src1, const uint8_t *src2
                 :"memory");
         }
         for (; x<w; x++) {
-            const long x2 = x<<2;
+            const int x2 = x<<2;
             d[8*x+0] = yp[x2];
             d[8*x+1] = up[x];
             d[8*x+2] = yp[x2+1];
@@ -2459,11 +2459,11 @@ static void RENAME(extract_odd2avg)(const uint8_t *src0, const uint8_t *src1, ui
 }
 
 static void RENAME(yuyvtoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src,
-                                 long width, long height,
-                                 long lumStride, long chromStride, long srcStride)
+                                 int width, int height,
+                                 int lumStride, int chromStride, int srcStride)
 {
-    long y;
-    const long chromWidth= -((-width)>>1);
+    int y;
+    const int chromWidth= -((-width)>>1);
 
     for (y=0; y<height; y++) {
         RENAME(extract_even)(src, ydst, width);
@@ -2485,11 +2485,11 @@ static void RENAME(yuyvtoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, co
 
 #if !COMPILE_TEMPLATE_AMD3DNOW
 static void RENAME(yuyvtoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src,
-                                 long width, long height,
-                                 long lumStride, long chromStride, long srcStride)
+                                 int width, int height,
+                                 int lumStride, int chromStride, int srcStride)
 {
-    long y;
-    const long chromWidth= -((-width)>>1);
+    int y;
+    const int chromWidth= -((-width)>>1);
 
     for (y=0; y<height; y++) {
         RENAME(extract_even)(src, ydst, width);
@@ -2509,11 +2509,11 @@ static void RENAME(yuyvtoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, co
 #endif /* !COMPILE_TEMPLATE_AMD3DNOW */
 
 static void RENAME(uyvytoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src,
-                                 long width, long height,
-                                 long lumStride, long chromStride, long srcStride)
+                                 int width, int height,
+                                 int lumStride, int chromStride, int srcStride)
 {
-    long y;
-    const long chromWidth= -((-width)>>1);
+    int y;
+    const int chromWidth= -((-width)>>1);
 
     for (y=0; y<height; y++) {
         RENAME(extract_even)(src+1, ydst, width);
@@ -2535,11 +2535,11 @@ static void RENAME(uyvytoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, co
 
 #if !COMPILE_TEMPLATE_AMD3DNOW
 static void RENAME(uyvytoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src,
-                                 long width, long height,
-                                 long lumStride, long chromStride, long srcStride)
+                                 int width, int height,
+                                 int lumStride, int chromStride, int srcStride)
 {
-    long y;
-    const long chromWidth= -((-width)>>1);
+    int y;
+    const int chromWidth= -((-width)>>1);
 
     for (y=0; y<height; y++) {
         RENAME(extract_even)(src+1, ydst, width);
diff --git a/libswscale/x86/swscale_template.c b/libswscale/x86/swscale_template.c
index bb35693cf9..e2a530ae8d 100644
--- a/libswscale/x86/swscale_template.c
+++ b/libswscale/x86/swscale_template.c
@@ -78,7 +78,7 @@ static inline void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter,
                                     const int16_t **chrVSrc,
                                     int chrFilterSize, const int16_t **alpSrc,
                                     uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
-                                    uint8_t *aDest, long dstW, long chrDstW)
+                                    uint8_t *aDest, int dstW, int chrDstW)
 {
     if (uDest) {
         x86_reg uv_off = c->uv_off;
@@ -159,7 +159,7 @@ static inline void RENAME(yuv2yuvX_ar)(SwsContext *c, const int16_t *lumFilter,
                                        const int16_t **chrVSrc,
                                        int chrFilterSize, const int16_t **alpSrc,
                                        uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
-                                       uint8_t *aDest, long dstW, long chrDstW)
+                                       uint8_t *aDest, int dstW, int chrDstW)
 {
     if (uDest) {
         x86_reg uv_off = c->uv_off;
@@ -190,9 +190,9 @@ static inline void RENAME(yuv2yuv1)(SwsContext *c, const int16_t *lumSrc,
                                     const int16_t *chrUSrc, const int16_t *chrVSrc,
                                     const int16_t *alpSrc,
                                     uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
-                                    uint8_t *aDest, long dstW, long chrDstW)
+                                    uint8_t *aDest, int dstW, int chrDstW)
 {
-    long p= 4;
+    int p= 4;
     const int16_t *src[4]= { alpSrc + dstW, lumSrc + dstW, chrUSrc + chrDstW, chrVSrc + chrDstW };
     uint8_t *dst[4]= { aDest, dest, uDest, vDest };
     x86_reg counter[4]= { dstW, dstW, chrDstW, chrDstW };
@@ -231,9 +231,9 @@ static inline void RENAME(yuv2yuv1_ar)(SwsContext *c, const int16_t *lumSrc,
                                        const int16_t *chrUSrc, const int16_t *chrVSrc,
                                        const int16_t *alpSrc,
                                        uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
-                                       uint8_t *aDest, long dstW, long chrDstW)
+                                       uint8_t *aDest, int dstW, int chrDstW)
 {
-    long p= 4;
+    int p= 4;
     const int16_t *src[4]= { alpSrc + dstW, lumSrc + dstW, chrUSrc + chrDstW, chrVSrc + chrDstW };
     uint8_t *dst[4]= { aDest, dest, uDest, vDest };
     x86_reg counter[4]= { dstW, dstW, chrDstW, chrDstW };
@@ -471,7 +471,7 @@ static inline void RENAME(yuv2rgb32_X_ar)(SwsContext *c, const int16_t *lumFilte
                                           const int16_t *chrFilter, const int16_t **chrUSrc,
                                           const int16_t **chrVSrc,
                                           int chrFilterSize, const int16_t **alpSrc,
-                                          uint8_t *dest, long dstW, long dstY)
+                                          uint8_t *dest, int dstW, int dstY)
 {
     x86_reg dummy=0;
     x86_reg dstW_reg = dstW;
@@ -504,7 +504,7 @@ static inline void RENAME(yuv2rgb32_X)(SwsContext *c, const int16_t *lumFilter,
                                        const int16_t *chrFilter, const int16_t **chrUSrc,
                                        const int16_t **chrVSrc,
                                        int chrFilterSize, const int16_t **alpSrc,
-                                       uint8_t *dest, long dstW, long dstY)
+                                       uint8_t *dest, int dstW, int dstY)
 {
     x86_reg dummy=0;
     x86_reg dstW_reg = dstW;
@@ -561,7 +561,7 @@ static inline void RENAME(yuv2rgb565_X_ar)(SwsContext *c, const int16_t *lumFilt
                                            const int16_t *chrFilter, const int16_t **chrUSrc,
                                            const int16_t **chrVSrc,
                                            int chrFilterSize, const int16_t **alpSrc,
-                                           uint8_t *dest, long dstW, long dstY)
+                                           uint8_t *dest, int dstW, int dstY)
 {
     x86_reg dummy=0;
     x86_reg dstW_reg = dstW;
@@ -585,7 +585,7 @@ static inline void RENAME(yuv2rgb565_X)(SwsContext *c, const int16_t *lumFilter,
                                         const int16_t *chrFilter, const int16_t **chrUSrc,
                                         const int16_t **chrVSrc,
                                         int chrFilterSize, const int16_t **alpSrc,
-                                        uint8_t *dest, long dstW, long dstY)
+                                        uint8_t *dest, int dstW, int dstY)
 {
     x86_reg dummy=0;
     x86_reg dstW_reg = dstW;
@@ -638,7 +638,7 @@ static inline void RENAME(yuv2rgb555_X_ar)(SwsContext *c, const int16_t *lumFilt
                                            const int16_t *chrFilter, const int16_t **chrUSrc,
                                            const int16_t **chrVSrc,
                                            int chrFilterSize, const int16_t **alpSrc,
-                                           uint8_t *dest, long dstW, long dstY)
+                                           uint8_t *dest, int dstW, int dstY)
 {
     x86_reg dummy=0;
     x86_reg dstW_reg = dstW;
@@ -662,7 +662,7 @@ static inline void RENAME(yuv2rgb555_X)(SwsContext *c, const int16_t *lumFilter,
                                         const int16_t *chrFilter, const int16_t **chrUSrc,
                                         const int16_t **chrVSrc,
                                         int chrFilterSize, const int16_t **alpSrc,
-                                        uint8_t *dest, long dstW, long dstY)
+                                        uint8_t *dest, int dstW, int dstY)
 {
     x86_reg dummy=0;
     x86_reg dstW_reg = dstW;
@@ -795,7 +795,7 @@ static inline void RENAME(yuv2bgr24_X_ar)(SwsContext *c, const int16_t *lumFilte
                                           const int16_t *chrFilter, const int16_t **chrUSrc,
                                           const int16_t **chrVSrc,
                                           int chrFilterSize, const int16_t **alpSrc,
-                                          uint8_t *dest, long dstW, long dstY)
+                                          uint8_t *dest, int dstW, int dstY)
 {
     x86_reg dummy=0;
     x86_reg dstW_reg = dstW;
@@ -819,7 +819,7 @@ static inline void RENAME(yuv2bgr24_X)(SwsContext *c, const int16_t *lumFilter,
                                        const int16_t *chrFilter, const int16_t **chrUSrc,
                                        const int16_t **chrVSrc,
                                        int chrFilterSize, const int16_t **alpSrc,
-                                       uint8_t *dest, long dstW, long dstY)
+                                       uint8_t *dest, int dstW, int dstY)
 {
     x86_reg dummy=0;
     x86_reg dstW_reg = dstW;
@@ -860,7 +860,7 @@ static inline void RENAME(yuv2yuyv422_X_ar)(SwsContext *c, const int16_t *lumFil
                                             const int16_t *chrFilter, const int16_t **chrUSrc,
                                             const int16_t **chrVSrc,
                                             int chrFilterSize, const int16_t **alpSrc,
-                                            uint8_t *dest, long dstW, long dstY)
+                                            uint8_t *dest, int dstW, int dstY)
 {
     x86_reg dummy=0;
     x86_reg dstW_reg = dstW;
@@ -881,7 +881,7 @@ static inline void RENAME(yuv2yuyv422_X)(SwsContext *c, const int16_t *lumFilter
                                          const int16_t *chrFilter, const int16_t **chrUSrc,
                                          const int16_t **chrVSrc,
                                          int chrFilterSize, const int16_t **alpSrc,
-                                         uint8_t *dest, long dstW, long dstY)
+                                         uint8_t *dest, int dstW, int dstY)
 {
     x86_reg dummy=0;
     x86_reg dstW_reg = dstW;
@@ -897,16 +897,16 @@ static inline void RENAME(yuv2yuyv422_X)(SwsContext *c, const int16_t *lumFilter
     YSCALEYUV2PACKEDX_END
 }
 
-#define REAL_YSCALEYUV2RGB_UV(index, c, uv_off) \
+#define REAL_YSCALEYUV2RGB_UV(index, c) \
     "xor            "#index", "#index"  \n\t"\
     ".p2align              4            \n\t"\
     "1:                                 \n\t"\
     "movq     (%2, "#index"), %%mm2     \n\t" /* uvbuf0[eax]*/\
     "movq     (%3, "#index"), %%mm3     \n\t" /* uvbuf1[eax]*/\
-    "add           "#uv_off", "#index"  \n\t" \
+    "add           "UV_OFFx2"("#c"), "#index"  \n\t" \
     "movq     (%2, "#index"), %%mm5     \n\t" /* uvbuf0[eax+2048]*/\
     "movq     (%3, "#index"), %%mm4     \n\t" /* uvbuf1[eax+2048]*/\
-    "sub           "#uv_off", "#index"  \n\t" \
+    "sub           "UV_OFFx2"("#c"), "#index"  \n\t" \
     "psubw             %%mm3, %%mm2     \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\
     "psubw             %%mm4, %%mm5     \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\
     "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0    \n\t"\
@@ -969,8 +969,8 @@ static inline void RENAME(yuv2yuyv422_X)(SwsContext *c, const int16_t *lumFilter
 
 #define YSCALEYUV2RGB_YA(index, c, b1, b2) REAL_YSCALEYUV2RGB_YA(index, c, b1, b2)
 
-#define YSCALEYUV2RGB(index, c, uv_off) \
-    REAL_YSCALEYUV2RGB_UV(index, c, uv_off) \
+#define YSCALEYUV2RGB(index, c) \
+    REAL_YSCALEYUV2RGB_UV(index, c) \
     REAL_YSCALEYUV2RGB_YA(index, c, %0, %1) \
     REAL_YSCALEYUV2RGB_COEFF(c)
 
@@ -984,12 +984,10 @@ static inline void RENAME(yuv2rgb32_2)(SwsContext *c, const uint16_t *buf0,
                                        const uint16_t *abuf1, uint8_t *dest,
                                        int dstW, int yalpha, int uvalpha, int y)
 {
-    x86_reg uv_off = c->uv_off << 1;
-
     if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
 #if ARCH_X86_64
         __asm__ volatile(
-            YSCALEYUV2RGB(%%r8, %5, %8)
+            YSCALEYUV2RGB(%%r8, %5)
             YSCALEYUV2RGB_YA(%%r8, %5, %6, %7)
             "psraw                  $3, %%mm1       \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
             "psraw                  $3, %%mm7       \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
@@ -997,7 +995,7 @@ static inline void RENAME(yuv2rgb32_2)(SwsContext *c, const uint16_t *buf0,
             WRITEBGR32(%4, 8280(%5), %%r8, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
             :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "r" (dest),
                "a" (&c->redDither),
-               "r" (abuf0), "r" (abuf1), "m"(uv_off)
+               "r" (abuf0), "r" (abuf1)
             : "%r8"
         );
 #else
@@ -1007,7 +1005,7 @@ static inline void RENAME(yuv2rgb32_2)(SwsContext *c, const uint16_t *buf0,
             "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
             "mov        %4, %%"REG_b"               \n\t"
             "push %%"REG_BP"                        \n\t"
-            YSCALEYUV2RGB(%%REGBP, %5, %6)
+            YSCALEYUV2RGB(%%REGBP, %5)
             "push                   %0              \n\t"
             "push                   %1              \n\t"
             "mov          "U_TEMP"(%5), %0          \n\t"
@@ -1022,7 +1020,7 @@ static inline void RENAME(yuv2rgb32_2)(SwsContext *c, const uint16_t *buf0,
             "pop %%"REG_BP"                         \n\t"
             "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
             :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
-               "a" (&c->redDither), "m"(uv_off)
+               "a" (&c->redDither)
         );
 #endif
     } else {
@@ -1030,13 +1028,13 @@ static inline void RENAME(yuv2rgb32_2)(SwsContext *c, const uint16_t *buf0,
             "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
             "mov        %4, %%"REG_b"               \n\t"
             "push %%"REG_BP"                        \n\t"
-            YSCALEYUV2RGB(%%REGBP, %5, %6)
+            YSCALEYUV2RGB(%%REGBP, %5)
             "pcmpeqd %%mm7, %%mm7                   \n\t"
             WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
             "pop %%"REG_BP"                         \n\t"
             "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
             :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
-               "a" (&c->redDither), "m"(uv_off)
+               "a" (&c->redDither)
         );
     }
 }
@@ -1048,20 +1046,18 @@ static inline void RENAME(yuv2bgr24_2)(SwsContext *c, const uint16_t *buf0,
                                        const uint16_t *abuf1, uint8_t *dest,
                                        int dstW, int yalpha, int uvalpha, int y)
 {
-    x86_reg uv_off = c->uv_off << 1;
-
     //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
     __asm__ volatile(
         "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
         "mov        %4, %%"REG_b"               \n\t"
         "push %%"REG_BP"                        \n\t"
-        YSCALEYUV2RGB(%%REGBP, %5, %6)
+        YSCALEYUV2RGB(%%REGBP, %5)
         "pxor    %%mm7, %%mm7                   \n\t"
         WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
         "pop %%"REG_BP"                         \n\t"
         "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
         :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
-           "a" (&c->redDither), "m"(uv_off)
+           "a" (&c->redDither)
     );
 }
 
@@ -1072,14 +1068,12 @@ static inline void RENAME(yuv2rgb555_2)(SwsContext *c, const uint16_t *buf0,
                                         const uint16_t *abuf1, uint8_t *dest,
                                         int dstW, int yalpha, int uvalpha, int y)
 {
-    x86_reg uv_off = c->uv_off << 1;
-
     //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
     __asm__ volatile(
         "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
         "mov        %4, %%"REG_b"               \n\t"
         "push %%"REG_BP"                        \n\t"
-        YSCALEYUV2RGB(%%REGBP, %5, %6)
+        YSCALEYUV2RGB(%%REGBP, %5)
         "pxor    %%mm7, %%mm7                   \n\t"
         /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
 #ifdef DITHER1XBPP
@@ -1091,7 +1085,7 @@ static inline void RENAME(yuv2rgb555_2)(SwsContext *c, const uint16_t *buf0,
         "pop %%"REG_BP"                         \n\t"
         "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
         :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
-           "a" (&c->redDither), "m"(uv_off)
+           "a" (&c->redDither)
     );
 }
 
@@ -1102,14 +1096,12 @@ static inline void RENAME(yuv2rgb565_2)(SwsContext *c, const uint16_t *buf0,
                                         const uint16_t *abuf1, uint8_t *dest,
                                         int dstW, int yalpha, int uvalpha, int y)
 {
-    x86_reg uv_off = c->uv_off << 1;
-
     //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
     __asm__ volatile(
         "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
         "mov        %4, %%"REG_b"               \n\t"
         "push %%"REG_BP"                        \n\t"
-        YSCALEYUV2RGB(%%REGBP, %5, %6)
+        YSCALEYUV2RGB(%%REGBP, %5)
         "pxor    %%mm7, %%mm7                   \n\t"
         /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
 #ifdef DITHER1XBPP
@@ -1121,11 +1113,11 @@ static inline void RENAME(yuv2rgb565_2)(SwsContext *c, const uint16_t *buf0,
         "pop %%"REG_BP"                         \n\t"
         "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
         :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
-           "a" (&c->redDither), "m"(uv_off)
+           "a" (&c->redDither)
     );
 }
 
-#define REAL_YSCALEYUV2PACKED(index, c, uv_off) \
+#define REAL_YSCALEYUV2PACKED(index, c) \
     "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0              \n\t"\
     "movq "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm1              \n\t"\
     "psraw                $3, %%mm0                           \n\t"\
@@ -1137,10 +1129,10 @@ static inline void RENAME(yuv2rgb565_2)(SwsContext *c, const uint16_t *buf0,
     "1:                                 \n\t"\
     "movq     (%2, "#index"), %%mm2     \n\t" /* uvbuf0[eax]*/\
     "movq     (%3, "#index"), %%mm3     \n\t" /* uvbuf1[eax]*/\
-    "add           "#uv_off", "#index"  \n\t" \
+    "add           "UV_OFFx2"("#c"), "#index"  \n\t" \
     "movq     (%2, "#index"), %%mm5     \n\t" /* uvbuf0[eax+2048]*/\
     "movq     (%3, "#index"), %%mm4     \n\t" /* uvbuf1[eax+2048]*/\
-    "sub           "#uv_off", "#index"  \n\t" \
+    "sub           "UV_OFFx2"("#c"), "#index"  \n\t" \
     "psubw             %%mm3, %%mm2     \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\
     "psubw             %%mm4, %%mm5     \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\
     "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0    \n\t"\
@@ -1163,7 +1155,7 @@ static inline void RENAME(yuv2rgb565_2)(SwsContext *c, const uint16_t *buf0,
     "paddw             %%mm0, %%mm1     \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
     "paddw             %%mm6, %%mm7     \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
 
-#define YSCALEYUV2PACKED(index, c, uv_off)  REAL_YSCALEYUV2PACKED(index, c, uv_off)
+#define YSCALEYUV2PACKED(index, c)  REAL_YSCALEYUV2PACKED(index, c)
 
 static inline void RENAME(yuv2yuyv422_2)(SwsContext *c, const uint16_t *buf0,
                                          const uint16_t *buf1, const uint16_t *ubuf0,
@@ -1172,30 +1164,28 @@ static inline void RENAME(yuv2yuyv422_2)(SwsContext *c, const uint16_t *buf0,
                                          const uint16_t *abuf1, uint8_t *dest,
                                          int dstW, int yalpha, int uvalpha, int y)
 {
-    x86_reg uv_off = c->uv_off << 1;
-
     //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
     __asm__ volatile(
         "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
         "mov %4, %%"REG_b"                        \n\t"
         "push %%"REG_BP"                        \n\t"
-        YSCALEYUV2PACKED(%%REGBP, %5, %6)
+        YSCALEYUV2PACKED(%%REGBP, %5)
         WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
         "pop %%"REG_BP"                         \n\t"
         "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
         :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
-           "a" (&c->redDither), "m"(uv_off)
+           "a" (&c->redDither)
     );
 }
 
-#define REAL_YSCALEYUV2RGB1(index, c, uv_off) \
+#define REAL_YSCALEYUV2RGB1(index, c) \
     "xor            "#index", "#index"  \n\t"\
     ".p2align              4            \n\t"\
     "1:                                 \n\t"\
     "movq     (%2, "#index"), %%mm3     \n\t" /* uvbuf0[eax]*/\
-    "add           "#uv_off", "#index"  \n\t" \
+    "add           "UV_OFFx2"("#c"), "#index"  \n\t" \
     "movq     (%2, "#index"), %%mm4     \n\t" /* uvbuf0[eax+2048]*/\
-    "sub           "#uv_off", "#index"  \n\t" \
+    "sub           "UV_OFFx2"("#c"), "#index"  \n\t" \
     "psraw                $4, %%mm3     \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
     "psraw                $4, %%mm4     \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
     "psubw  "U_OFFSET"("#c"), %%mm3     \n\t" /* (U-128)8*/\
@@ -1237,19 +1227,19 @@ static inline void RENAME(yuv2yuyv422_2)(SwsContext *c, const uint16_t *buf0,
     "packuswb          %%mm6, %%mm5     \n\t"\
     "packuswb          %%mm3, %%mm4     \n\t"\
 
-#define YSCALEYUV2RGB1(index, c, uv_off)  REAL_YSCALEYUV2RGB1(index, c, uv_off)
+#define YSCALEYUV2RGB1(index, c)  REAL_YSCALEYUV2RGB1(index, c)
 
 // do vertical chrominance interpolation
-#define REAL_YSCALEYUV2RGB1b(index, c, uv_off) \
+#define REAL_YSCALEYUV2RGB1b(index, c) \
     "xor            "#index", "#index"  \n\t"\
     ".p2align              4            \n\t"\
     "1:                                 \n\t"\
     "movq     (%2, "#index"), %%mm2     \n\t" /* uvbuf0[eax]*/\
     "movq     (%3, "#index"), %%mm3     \n\t" /* uvbuf1[eax]*/\
-    "add           "#uv_off", "#index"  \n\t" \
+    "add           "UV_OFFx2"("#c"), "#index"  \n\t" \
     "movq     (%2, "#index"), %%mm5     \n\t" /* uvbuf0[eax+2048]*/\
     "movq     (%3, "#index"), %%mm4     \n\t" /* uvbuf1[eax+2048]*/\
-    "sub           "#uv_off", "#index"  \n\t" \
+    "sub           "UV_OFFx2"("#c"), "#index"  \n\t" \
     "paddw             %%mm2, %%mm3     \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\
     "paddw             %%mm5, %%mm4     \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\
     "psrlw                $5, %%mm3     \n\t" /*FIXME might overflow*/\
@@ -1293,7 +1283,7 @@ static inline void RENAME(yuv2yuyv422_2)(SwsContext *c, const uint16_t *buf0,
     "packuswb          %%mm6, %%mm5     \n\t"\
     "packuswb          %%mm3, %%mm4     \n\t"\
 
-#define YSCALEYUV2RGB1b(index, c, uv_off)  REAL_YSCALEYUV2RGB1b(index, c, uv_off)
+#define YSCALEYUV2RGB1b(index, c)  REAL_YSCALEYUV2RGB1b(index, c)
 
 #define REAL_YSCALEYUV2RGB1_ALPHA(index) \
     "movq  (%1, "#index", 2), %%mm7     \n\t" /* abuf0[index  ]     */\
@@ -1313,7 +1303,6 @@ static inline void RENAME(yuv2rgb32_1)(SwsContext *c, const uint16_t *buf0,
                                        int dstW, int uvalpha, enum PixelFormat dstFormat,
                                        int flags, int y)
 {
-    x86_reg uv_off = c->uv_off << 1;
     const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
 
     if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
@@ -1322,26 +1311,26 @@ static inline void RENAME(yuv2rgb32_1)(SwsContext *c, const uint16_t *buf0,
                 "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
                 "mov        %4, %%"REG_b"               \n\t"
                 "push %%"REG_BP"                        \n\t"
-                YSCALEYUV2RGB1(%%REGBP, %5, %6)
+                YSCALEYUV2RGB1(%%REGBP, %5)
                 YSCALEYUV2RGB1_ALPHA(%%REGBP)
                 WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
                 "pop %%"REG_BP"                         \n\t"
                 "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
                 :: "c" (buf0), "d" (abuf0), "S" (ubuf0), "D" (ubuf1), "m" (dest),
-                   "a" (&c->redDither), "m"(uv_off)
+                   "a" (&c->redDither)
             );
         } else {
             __asm__ volatile(
                 "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
                 "mov        %4, %%"REG_b"               \n\t"
                 "push %%"REG_BP"                        \n\t"
-                YSCALEYUV2RGB1(%%REGBP, %5, %6)
+                YSCALEYUV2RGB1(%%REGBP, %5)
                 "pcmpeqd %%mm7, %%mm7                   \n\t"
                 WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
                 "pop %%"REG_BP"                         \n\t"
                 "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
                 :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
-                   "a" (&c->redDither), "m"(uv_off)
+                   "a" (&c->redDither)
             );
         }
     } else {
@@ -1350,26 +1339,26 @@ static inline void RENAME(yuv2rgb32_1)(SwsContext *c, const uint16_t *buf0,
                 "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
                 "mov        %4, %%"REG_b"               \n\t"
                 "push %%"REG_BP"                        \n\t"
-                YSCALEYUV2RGB1b(%%REGBP, %5, %6)
+                YSCALEYUV2RGB1b(%%REGBP, %5)
                 YSCALEYUV2RGB1_ALPHA(%%REGBP)
                 WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
                 "pop %%"REG_BP"                         \n\t"
                 "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
                 :: "c" (buf0), "d" (abuf0), "S" (ubuf0), "D" (ubuf1), "m" (dest),
-                   "a" (&c->redDither), "m"(uv_off)
+                   "a" (&c->redDither)
             );
         } else {
             __asm__ volatile(
                 "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
                 "mov        %4, %%"REG_b"               \n\t"
                 "push %%"REG_BP"                        \n\t"
-                YSCALEYUV2RGB1b(%%REGBP, %5, %6)
+                YSCALEYUV2RGB1b(%%REGBP, %5)
                 "pcmpeqd %%mm7, %%mm7                   \n\t"
                 WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
                 "pop %%"REG_BP"                         \n\t"
                 "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
                 :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
-                   "a" (&c->redDither), "m"(uv_off)
+                   "a" (&c->redDither)
             );
         }
     }
@@ -1382,7 +1371,6 @@ static inline void RENAME(yuv2bgr24_1)(SwsContext *c, const uint16_t *buf0,
                                        int dstW, int uvalpha, enum PixelFormat dstFormat,
                                        int flags, int y)
 {
-    x86_reg uv_off = c->uv_off << 1;
     const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
 
     if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
@@ -1390,26 +1378,26 @@ static inline void RENAME(yuv2bgr24_1)(SwsContext *c, const uint16_t *buf0,
             "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
             "mov        %4, %%"REG_b"               \n\t"
             "push %%"REG_BP"                        \n\t"
-            YSCALEYUV2RGB1(%%REGBP, %5, %6)
+            YSCALEYUV2RGB1(%%REGBP, %5)
             "pxor    %%mm7, %%mm7                   \n\t"
             WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
             "pop %%"REG_BP"                         \n\t"
             "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
             :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
-               "a" (&c->redDither), "m"(uv_off)
+               "a" (&c->redDither)
         );
     } else {
         __asm__ volatile(
             "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
             "mov        %4, %%"REG_b"               \n\t"
             "push %%"REG_BP"                        \n\t"
-            YSCALEYUV2RGB1b(%%REGBP, %5, %6)
+            YSCALEYUV2RGB1b(%%REGBP, %5)
             "pxor    %%mm7, %%mm7                   \n\t"
             WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
             "pop %%"REG_BP"                         \n\t"
             "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
             :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
-               "a" (&c->redDither), "m"(uv_off)
+               "a" (&c->redDither)
         );
     }
 }
@@ -1421,7 +1409,6 @@ static inline void RENAME(yuv2rgb555_1)(SwsContext *c, const uint16_t *buf0,
                                         int dstW, int uvalpha, enum PixelFormat dstFormat,
                                         int flags, int y)
 {
-    x86_reg uv_off = c->uv_off << 1;
     const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
 
     if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
@@ -1429,7 +1416,7 @@ static inline void RENAME(yuv2rgb555_1)(SwsContext *c, const uint16_t *buf0,
             "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
             "mov        %4, %%"REG_b"               \n\t"
             "push %%"REG_BP"                        \n\t"
-            YSCALEYUV2RGB1(%%REGBP, %5, %6)
+            YSCALEYUV2RGB1(%%REGBP, %5)
             "pxor    %%mm7, %%mm7                   \n\t"
             /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
 #ifdef DITHER1XBPP
@@ -1441,14 +1428,14 @@ static inline void RENAME(yuv2rgb555_1)(SwsContext *c, const uint16_t *buf0,
             "pop %%"REG_BP"                         \n\t"
             "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
             :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
-               "a" (&c->redDither), "m"(uv_off)
+               "a" (&c->redDither)
         );
     } else {
         __asm__ volatile(
             "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
             "mov        %4, %%"REG_b"               \n\t"
             "push %%"REG_BP"                        \n\t"
-            YSCALEYUV2RGB1b(%%REGBP, %5, %6)
+            YSCALEYUV2RGB1b(%%REGBP, %5)
             "pxor    %%mm7, %%mm7                   \n\t"
             /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
 #ifdef DITHER1XBPP
@@ -1460,7 +1447,7 @@ static inline void RENAME(yuv2rgb555_1)(SwsContext *c, const uint16_t *buf0,
             "pop %%"REG_BP"                         \n\t"
             "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
             :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
-               "a" (&c->redDither), "m"(uv_off)
+               "a" (&c->redDither)
         );
     }
 }
@@ -1472,7 +1459,6 @@ static inline void RENAME(yuv2rgb565_1)(SwsContext *c, const uint16_t *buf0,
                                         int dstW, int uvalpha, enum PixelFormat dstFormat,
                                         int flags, int y)
 {
-    x86_reg uv_off = c->uv_off << 1;
     const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
 
     if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
@@ -1480,7 +1466,7 @@ static inline void RENAME(yuv2rgb565_1)(SwsContext *c, const uint16_t *buf0,
             "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
             "mov        %4, %%"REG_b"               \n\t"
             "push %%"REG_BP"                        \n\t"
-            YSCALEYUV2RGB1(%%REGBP, %5, %6)
+            YSCALEYUV2RGB1(%%REGBP, %5)
             "pxor    %%mm7, %%mm7                   \n\t"
             /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
 #ifdef DITHER1XBPP
@@ -1492,14 +1478,14 @@ static inline void RENAME(yuv2rgb565_1)(SwsContext *c, const uint16_t *buf0,
             "pop %%"REG_BP"                         \n\t"
             "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
             :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
-               "a" (&c->redDither), "m"(uv_off)
+               "a" (&c->redDither)
         );
     } else {
         __asm__ volatile(
             "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
             "mov        %4, %%"REG_b"               \n\t"
             "push %%"REG_BP"                        \n\t"
-            YSCALEYUV2RGB1b(%%REGBP, %5, %6)
+            YSCALEYUV2RGB1b(%%REGBP, %5)
             "pxor    %%mm7, %%mm7                   \n\t"
             /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
 #ifdef DITHER1XBPP
@@ -1511,19 +1497,19 @@ static inline void RENAME(yuv2rgb565_1)(SwsContext *c, const uint16_t *buf0,
             "pop %%"REG_BP"                         \n\t"
             "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
             :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
-               "a" (&c->redDither), "m"(uv_off)
+               "a" (&c->redDither)
         );
     }
 }
 
-#define REAL_YSCALEYUV2PACKED1(index, c, uv_off) \
+#define REAL_YSCALEYUV2PACKED1(index, c) \
     "xor            "#index", "#index"  \n\t"\
     ".p2align              4            \n\t"\
     "1:                                 \n\t"\
     "movq     (%2, "#index"), %%mm3     \n\t" /* uvbuf0[eax]*/\
-    "add           "#uv_off", "#index"  \n\t" \
+    "add           "UV_OFFx2"("#c"), "#index"  \n\t" \
     "movq     (%2, "#index"), %%mm4     \n\t" /* uvbuf0[eax+2048]*/\
-    "sub           "#uv_off", "#index"  \n\t" \
+    "sub           "UV_OFFx2"("#c"), "#index"  \n\t" \
     "psraw                $7, %%mm3     \n\t" \
     "psraw                $7, %%mm4     \n\t" \
     "movq  (%0, "#index", 2), %%mm1     \n\t" /*buf0[eax]*/\
@@ -1531,18 +1517,18 @@ static inline void RENAME(yuv2rgb565_1)(SwsContext *c, const uint16_t *buf0,
     "psraw                $7, %%mm1     \n\t" \
     "psraw                $7, %%mm7     \n\t" \
 
-#define YSCALEYUV2PACKED1(index, c, uv_off)  REAL_YSCALEYUV2PACKED1(index, c, uv_off)
+#define YSCALEYUV2PACKED1(index, c)  REAL_YSCALEYUV2PACKED1(index, c)
 
-#define REAL_YSCALEYUV2PACKED1b(index, c, uv_off) \
+#define REAL_YSCALEYUV2PACKED1b(index, c) \
     "xor "#index", "#index"             \n\t"\
     ".p2align              4            \n\t"\
     "1:                                 \n\t"\
     "movq     (%2, "#index"), %%mm2     \n\t" /* uvbuf0[eax]*/\
     "movq     (%3, "#index"), %%mm3     \n\t" /* uvbuf1[eax]*/\
-    "add           "#uv_off", "#index"  \n\t" \
+    "add           "UV_OFFx2"("#c"), "#index"  \n\t" \
     "movq     (%2, "#index"), %%mm5     \n\t" /* uvbuf0[eax+2048]*/\
     "movq     (%3, "#index"), %%mm4     \n\t" /* uvbuf1[eax+2048]*/\
-    "sub           "#uv_off", "#index"  \n\t" \
+    "sub           "UV_OFFx2"("#c"), "#index"  \n\t" \
     "paddw             %%mm2, %%mm3     \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\
     "paddw             %%mm5, %%mm4     \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\
     "psrlw                $8, %%mm3     \n\t" \
@@ -1551,7 +1537,7 @@ static inline void RENAME(yuv2rgb565_1)(SwsContext *c, const uint16_t *buf0,
     "movq 8(%0, "#index", 2), %%mm7     \n\t" /*buf0[eax]*/\
     "psraw                $7, %%mm1     \n\t" \
     "psraw                $7, %%mm7     \n\t"
-#define YSCALEYUV2PACKED1b(index, c, uv_off)  REAL_YSCALEYUV2PACKED1b(index, c, uv_off)
+#define YSCALEYUV2PACKED1b(index, c)  REAL_YSCALEYUV2PACKED1b(index, c)
 
 static inline void RENAME(yuv2yuyv422_1)(SwsContext *c, const uint16_t *buf0,
                                          const uint16_t *ubuf0, const uint16_t *ubuf1,
@@ -1560,7 +1546,6 @@ static inline void RENAME(yuv2yuyv422_1)(SwsContext *c, const uint16_t *buf0,
                                          int dstW, int uvalpha, enum PixelFormat dstFormat,
                                          int flags, int y)
 {
-    x86_reg uv_off = c->uv_off << 1;
     const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
 
     if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
@@ -1568,24 +1553,24 @@ static inline void RENAME(yuv2yuyv422_1)(SwsContext *c, const uint16_t *buf0,
             "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
             "mov        %4, %%"REG_b"               \n\t"
             "push %%"REG_BP"                        \n\t"
-            YSCALEYUV2PACKED1(%%REGBP, %5, %6)
+            YSCALEYUV2PACKED1(%%REGBP, %5)
             WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
             "pop %%"REG_BP"                         \n\t"
             "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
             :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
-               "a" (&c->redDither), "m"(uv_off)
+               "a" (&c->redDither)
         );
     } else {
         __asm__ volatile(
             "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
             "mov        %4, %%"REG_b"               \n\t"
             "push %%"REG_BP"                        \n\t"
-            YSCALEYUV2PACKED1b(%%REGBP, %5, %6)
+            YSCALEYUV2PACKED1b(%%REGBP, %5)
             WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
             "pop %%"REG_BP"                         \n\t"
             "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
             :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
-               "a" (&c->redDither), "m"(uv_off)
+               "a" (&c->redDither)
         );
     }
 }
@@ -1593,7 +1578,7 @@ static inline void RENAME(yuv2yuyv422_1)(SwsContext *c, const uint16_t *buf0,
 #if !COMPILE_TEMPLATE_MMX2
 //FIXME yuy2* can read up to 7 samples too much
 
-static inline void RENAME(yuy2ToY)(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused)
+static inline void RENAME(yuy2ToY)(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
 {
     __asm__ volatile(
         "movq "MANGLE(bm01010101)", %%mm2           \n\t"
@@ -1612,7 +1597,7 @@ static inline void RENAME(yuy2ToY)(uint8_t *dst, const uint8_t *src, long width,
     );
 }
 
-static inline void RENAME(yuy2ToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
+static inline void RENAME(yuy2ToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, int width, uint32_t *unused)
 {
     __asm__ volatile(
         "movq "MANGLE(bm01010101)", %%mm4           \n\t"
@@ -1638,7 +1623,7 @@ static inline void RENAME(yuy2ToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t
     assert(src1 == src2);
 }
 
-static inline void RENAME(LEToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
+static inline void RENAME(LEToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, int width, uint32_t *unused)
 {
     __asm__ volatile(
         "mov                    %0, %%"REG_a"       \n\t"
@@ -1664,7 +1649,7 @@ static inline void RENAME(LEToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *s
 
 /* This is almost identical to the previous, end exists only because
  * yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses. */
-static inline void RENAME(uyvyToY)(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused)
+static inline void RENAME(uyvyToY)(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
 {
     __asm__ volatile(
         "mov                  %0, %%"REG_a"         \n\t"
@@ -1682,7 +1667,7 @@ static inline void RENAME(uyvyToY)(uint8_t *dst, const uint8_t *src, long width,
     );
 }
 
-static inline void RENAME(uyvyToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
+static inline void RENAME(uyvyToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, int width, uint32_t *unused)
 {
     __asm__ volatile(
         "movq "MANGLE(bm01010101)", %%mm4           \n\t"
@@ -1708,7 +1693,7 @@ static inline void RENAME(uyvyToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t
     assert(src1 == src2);
 }
 
-static inline void RENAME(BEToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
+static inline void RENAME(BEToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, int width, uint32_t *unused)
 {
     __asm__ volatile(
         "movq "MANGLE(bm01010101)", %%mm4           \n\t"
@@ -1734,7 +1719,7 @@ static inline void RENAME(BEToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *s
 }
 
 static inline void RENAME(nvXXtoUV)(uint8_t *dst1, uint8_t *dst2,
-                                    const uint8_t *src, long width)
+                                    const uint8_t *src, int width)
 {
     __asm__ volatile(
         "movq "MANGLE(bm01010101)", %%mm4           \n\t"
@@ -1761,20 +1746,20 @@ static inline void RENAME(nvXXtoUV)(uint8_t *dst1, uint8_t *dst2,
 
 static inline void RENAME(nv12ToUV)(uint8_t *dstU, uint8_t *dstV,
                                     const uint8_t *src1, const uint8_t *src2,
-                                    long width, uint32_t *unused)
+                                    int width, uint32_t *unused)
 {
     RENAME(nvXXtoUV)(dstU, dstV, src1, width);
 }
 
 static inline void RENAME(nv21ToUV)(uint8_t *dstU, uint8_t *dstV,
                                     const uint8_t *src1, const uint8_t *src2,
-                                    long width, uint32_t *unused)
+                                    int width, uint32_t *unused)
 {
     RENAME(nvXXtoUV)(dstV, dstU, src1, width);
 }
 #endif /* !COMPILE_TEMPLATE_MMX2 */
 
-static inline void RENAME(bgr24ToY_mmx)(int16_t *dst, const uint8_t *src, long width, enum PixelFormat srcFormat)
+static inline void RENAME(bgr24ToY_mmx)(int16_t *dst, const uint8_t *src, int width, enum PixelFormat srcFormat)
 {
 
     if(srcFormat == PIX_FMT_BGR24) {
@@ -1826,7 +1811,7 @@ static inline void RENAME(bgr24ToY_mmx)(int16_t *dst, const uint8_t *src, long w
     );
 }
 
-static inline void RENAME(bgr24ToUV_mmx)(int16_t *dstU, int16_t *dstV, const uint8_t *src, long width, enum PixelFormat srcFormat)
+static inline void RENAME(bgr24ToUV_mmx)(int16_t *dstU, int16_t *dstV, const uint8_t *src, int width, enum PixelFormat srcFormat)
 {
     __asm__ volatile(
         "movq                    24(%4), %%mm6       \n\t"
@@ -1882,23 +1867,23 @@ static inline void RENAME(bgr24ToUV_mmx)(int16_t *dstU, int16_t *dstV, const uin
     );
 }
 
-static inline void RENAME(bgr24ToY)(int16_t *dst, const uint8_t *src, long width, uint32_t *unused)
+static inline void RENAME(bgr24ToY)(int16_t *dst, const uint8_t *src, int width, uint32_t *unused)
 {
     RENAME(bgr24ToY_mmx)(dst, src, width, PIX_FMT_BGR24);
 }
 
-static inline void RENAME(bgr24ToUV)(int16_t *dstU, int16_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
+static inline void RENAME(bgr24ToUV)(int16_t *dstU, int16_t *dstV, const uint8_t *src1, const uint8_t *src2, int width, uint32_t *unused)
 {
     RENAME(bgr24ToUV_mmx)(dstU, dstV, src1, width, PIX_FMT_BGR24);
     assert(src1 == src2);
 }
 
-static inline void RENAME(rgb24ToY)(int16_t *dst, const uint8_t *src, long width, uint32_t *unused)
+static inline void RENAME(rgb24ToY)(int16_t *dst, const uint8_t *src, int width, uint32_t *unused)
 {
     RENAME(bgr24ToY_mmx)(dst, src, width, PIX_FMT_RGB24);
 }
 
-static inline void RENAME(rgb24ToUV)(int16_t *dstU, int16_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
+static inline void RENAME(rgb24ToUV)(int16_t *dstU, int16_t *dstV, const uint8_t *src1, const uint8_t *src2, int width, uint32_t *unused)
 {
     assert(src1==src2);
     RENAME(bgr24ToUV_mmx)(dstU, dstV, src1, width, PIX_FMT_RGB24);
@@ -1907,7 +1892,7 @@ static inline void RENAME(rgb24ToUV)(int16_t *dstU, int16_t *dstV, const uint8_t
 #if !COMPILE_TEMPLATE_MMX2
 // bilinear / bicubic scaling
 static inline void RENAME(hScale)(int16_t *dst, int dstW, const uint8_t *src, int srcW, int xInc,
-                                  const int16_t *filter, const int16_t *filterPos, long filterSize)
+                                  const int16_t *filter, const int16_t *filterPos, int filterSize)
 {
     assert(filterSize % 4 == 0 && filterSize>0);
     if (filterSize==4) { // Always true for upscaling, sometimes for down, too.
@@ -2220,7 +2205,7 @@ static inline void RENAME(hScale16)(int16_t *dst, int dstW, const uint16_t *src,
 
 #if COMPILE_TEMPLATE_MMX2
 static inline void RENAME(hyscale_fast)(SwsContext *c, int16_t *dst,
-                                        long dstWidth, const uint8_t *src, int srcW,
+                                        int dstWidth, const uint8_t *src, int srcW,
                                         int xInc)
 {
     int32_t *filterPos = c->hLumFilterPos;
@@ -2292,7 +2277,7 @@ static inline void RENAME(hyscale_fast)(SwsContext *c, int16_t *dst,
 }
 
 static inline void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst1, int16_t *dst2,
-                                        long dstWidth, const uint8_t *src1,
+                                        int dstWidth, const uint8_t *src1,
                                         const uint8_t *src2, int srcW, int xInc)
 {
     int32_t *filterPos = c->hChrFilterPos;
author	Michael Niedermayer <michaelni@gmx.at>	2011-05-29 02:55:19 +0200
committer	Michael Niedermayer <michaelni@gmx.at>	2011-05-29 03:34:35 +0200
commit	b8a43bc1b50f409414493a05f6c4b7895ca4ddf9 (patch)
tree	95dda1b7289aac9bdb1f457417baf9515aa4383a
parent	39d607e5bbc25ad9629683702b510e865434ef21 (diff)
parent	90da52f01f8b6c22af22a002eb226989b1cf7ef8 (diff)
download	ffmpeg-b8a43bc1b50f409414493a05f6c4b7895ca4ddf9.tar.gz