88 files changed, 864 insertions, 606 deletions
diff --git a/Makefile b/Makefile
index 498a0cf248..9eff02dbed 100644
--- a/Makefile
+++ b/Makefile
@@ -1,13 +1,49 @@
 include config.mak
 
-SRC_DIR = $(SRC_PATH_BARE)
+vpath %.c    $(SRC_PATH)
+vpath %.h    $(SRC_PATH)
+vpath %.S    $(SRC_PATH)
+vpath %.asm  $(SRC_PATH)
+vpath %.v    $(SRC_PATH)
+vpath %.texi $(SRC_PATH)
+
+ifndef V
+Q      = @
+ECHO   = printf "$(1)\t%s\n" $(2)
+BRIEF  = CC AS YASM AR LD HOSTCC STRIP CP
+SILENT = DEPCC YASMDEP RM RANLIB
+MSG    = $@
+M      = @$(call ECHO,$(TAG),$@);
+$(foreach VAR,$(BRIEF), \
+    $(eval override $(VAR) = @$$(call ECHO,$(VAR),$$(MSG)); $($(VAR))))
+$(foreach VAR,$(SILENT),$(eval override $(VAR) = @$($(VAR))))
+$(eval INSTALL = @$(call ECHO,INSTALL,$$(^:$(SRC_PATH)/%=%)); $(INSTALL))
+endif
+
+IFLAGS     := -I. -I$(SRC_PATH)
+CPPFLAGS   := $(IFLAGS) $(CPPFLAGS)
+CFLAGS     += $(ECFLAGS)
+YASMFLAGS  += $(IFLAGS) -Pconfig.asm
+HOSTCFLAGS += $(IFLAGS)
+
+%.o: %.c
+	$(CCDEP)
+	$(CC) $(CPPFLAGS) $(CFLAGS) $(CC_DEPFLAGS) -c $(CC_O) $<
+
+%.o: %.S
+	$(ASDEP)
+	$(AS) $(CPPFLAGS) $(ASFLAGS) $(AS_DEPFLAGS) -c -o $@ $<
+
+%.ho: %.h
+	$(CC) $(CPPFLAGS) $(CFLAGS) -Wno-unused -c -o $@ -x c $<
 
-vpath %.c   $(SRC_DIR)
-vpath %.h   $(SRC_DIR)
-vpath %.S   $(SRC_DIR)
-vpath %.asm $(SRC_DIR)
-vpath %.v   $(SRC_DIR)
-vpath %.texi $(SRC_PATH_BARE)
+%.ver: %.v
+	$(Q)sed 's/$$MAJOR/$($(basename $(@F))_VERSION_MAJOR)/' $^ > $@
+
+%.c %.h: TAG = GEN
+
+# Do not delete intermediate files from chains of implicit rules
+$(OBJS):
 
 PROGS-$(CONFIG_FFMPEG)   += ffmpeg
 PROGS-$(CONFIG_FFPLAY)   += ffplay
@@ -37,7 +73,7 @@ FFLIBS-$(CONFIG_SWSCALE)  += swscale
 
 FFLIBS := avutil
 
-DATA_FILES := $(wildcard $(SRC_DIR)/ffpresets/*.ffpreset)
+DATA_FILES := $(wildcard $(SRC_PATH)/ffpresets/*.ffpreset)
 
 SKIPHEADERS = cmdutils_common_opts.h
 
@@ -54,7 +90,7 @@ $(PROGS): %$(EXESUF): %_g$(EXESUF)
 	$(STRIP) $@
 
 config.h: .config
-.config: $(wildcard $(FFLIBS:%=$(SRC_DIR)/lib%/all*.c))
+.config: $(wildcard $(FFLIBS:%=$(SRC_PATH)/lib%/all*.c))
 	@-tput bold 2>/dev/null
 	@-printf '\nWARNING: $(?F) newer than config.h, rerun configure\n\n'
 	@-tput sgr0 2>/dev/null
@@ -93,8 +129,8 @@ tools/%.o: tools/%.c
 
 -include $(wildcard tools/*.d)
 
-VERSION_SH  = $(SRC_PATH_BARE)/version.sh
-GIT_LOG     = $(SRC_PATH_BARE)/.git/logs/HEAD
+VERSION_SH  = $(SRC_PATH)/version.sh
+GIT_LOG     = $(SRC_PATH)/.git/logs/HEAD
 
 .version: $(wildcard $(GIT_LOG)) $(VERSION_SH) config.mak
 .version: M=@
@@ -151,5 +187,13 @@ check: test
 include doc/Makefile
 include tests/Makefile
 
+# Dummy rule to stop make trying to rebuild removed or renamed headers
+%.h:
+	@:
+
+# Disable suffix rules.  Most of the builtin rules are suffix rules,
+# so this saves some time on slow systems.
+.SUFFIXES:
+
 .PHONY: all alltools *clean check config examples install*
 .PHONY: testprogs uninstall*
diff --git a/common.mak b/common.mak
index a293987272..e2eaa399da 100644
--- a/common.mak
+++ b/common.mak
@@ -5,56 +5,6 @@
 # first so "all" becomes default target
 all: all-yes
 
-ifndef SUBDIR
-
-ifndef V
-Q      = @
-ECHO   = printf "$(1)\t%s\n" $(2)
-BRIEF  = CC AS YASM AR LD HOSTCC STRIP CP
-SILENT = DEPCC YASMDEP RM RANLIB
-MSG    = $@
-M      = @$(call ECHO,$(TAG),$@);
-$(foreach VAR,$(BRIEF), \
-    $(eval override $(VAR) = @$$(call ECHO,$(VAR),$$(MSG)); $($(VAR))))
-$(foreach VAR,$(SILENT),$(eval override $(VAR) = @$($(VAR))))
-$(eval INSTALL = @$(call ECHO,INSTALL,$$(^:$(SRC_DIR)/%=%)); $(INSTALL))
-endif
-
-IFLAGS   := -I. -I$(SRC_PATH)
-CPPFLAGS := $(IFLAGS) $(CPPFLAGS)
-CFLAGS   += $(ECFLAGS)
-YASMFLAGS += $(IFLAGS) -Pconfig.asm
-
-HOSTCFLAGS += $(IFLAGS)
-
-%.o: %.c
-	$(CCDEP)
-	$(CC) $(CPPFLAGS) $(CFLAGS) $(CC_DEPFLAGS) -c $(CC_O) $<
-
-%.o: %.S
-	$(ASDEP)
-	$(AS) $(CPPFLAGS) $(ASFLAGS) $(AS_DEPFLAGS) -c -o $@ $<
-
-%.ho: %.h
-	$(CC) $(CPPFLAGS) $(CFLAGS) -Wno-unused -c -o $@ -x c $<
-
-%.ver: %.v
-	$(Q)sed 's/$$MAJOR/$($(basename $(@F))_VERSION_MAJOR)/' $^ > $@
-
-%.c %.h: TAG = GEN
-
-# Dummy rule to stop make trying to rebuild removed or renamed headers
-%.h:
-	@:
-
-# Disable suffix rules.  Most of the builtin rules are suffix rules,
-# so this saves some time on slow systems.
-.SUFFIXES:
-
-# Do not delete intermediate files from chains of implicit rules
-$(OBJS):
-endif
-
 OBJS-$(HAVE_MMX) +=  $(MMX-OBJS-yes)
 
 OBJS      += $(OBJS-yes)
diff --git a/configure b/configure
index aa62831f4f..7d4cd1417a 100755
--- a/configure
+++ b/configure
@@ -971,6 +971,7 @@ CONFIG_LIST="
     static
     swscale
     swscale_alpha
+    thumb
     vaapi
     vdpau
     version3
@@ -1717,7 +1718,7 @@ DEPFLAGS='$(CPPFLAGS) $(CFLAGS) -MM'
 
 # find source path
 if test -f configure; then
-    source_path="$(pwd)"
+    source_path=.
     disable source_path_used
 else
     source_path=$(cd $(dirname "$0"); pwd)
@@ -2300,7 +2301,7 @@ elif enabled arm; then
             case $cpu in
                 cortex-a*)                               subarch=armv7a  ;;
                 cortex-r*)                               subarch=armv7r  ;;
-                cortex-m*)                               subarch=armv7m  ;;
+                cortex-m*)                 enable thumb; subarch=armv7m  ;;
                 arm11*)                                  subarch=armv6   ;;
                 arm[79]*e*|arm9[24]6*|arm96*|arm102[26]) subarch=armv5te ;;
                 armv4*|arm7*|arm9[24]*)                  subarch=armv4   ;;
@@ -2640,7 +2641,7 @@ if enabled alpha; then
 
 elif enabled arm; then
 
-    check_cflags -marm
+    enabled thumb && check_cflags -mthumb || check_cflags -marm
     nogas=die
 
     if     check_cpp_condition stddef.h "defined __ARM_PCS_VFP"; then
@@ -3325,8 +3326,7 @@ INCDIR=\$(DESTDIR)$incdir
 BINDIR=\$(DESTDIR)$bindir
 DATADIR=\$(DESTDIR)$datadir
 MANDIR=\$(DESTDIR)$mandir
-SRC_PATH="$source_path"
-SRC_PATH_BARE=$source_path
+SRC_PATH=$source_path
 CC_IDENT=$cc_ident
 ARCH=$arch
 CC=$cc
diff --git a/doc/APIchanges b/doc/APIchanges
index ec76a7bb2a..2fd79b877a 100644
--- a/doc/APIchanges
+++ b/doc/APIchanges
@@ -43,6 +43,9 @@ API changes, most recent first:
 2011-06-12 - xxxxxxx - lavfi 2.16.0 - avfilter_graph_parse()
   Change avfilter_graph_parse() signature.
 
+2011-06-xx - xxxxxxx - lavu 51.8.0 - attributes.h
+  Add av_printf_format().
+
 2011-06-xx - xxxxxxx - lavf 53.2.0 - avformat.h
   Add avformat_open_input and avformat_write_header().
   Deprecate av_open_input_stream, av_open_input_file,
diff --git a/doc/Makefile b/doc/Makefile
index a5e090b0cc..558277ad0c 100644
--- a/doc/Makefile
+++ b/doc/Makefile
@@ -11,9 +11,9 @@ documentation: $(DOCS)
 TEXIDEP = awk '/^@include/ { printf "$@: $(@D)/%s\n", $$2 }' <$< >$(@:%=%.d)
 
 doc/%.html: TAG = HTML
-doc/%.html: doc/%.texi $(SRC_PATH_BARE)/doc/t2h.init
+doc/%.html: doc/%.texi $(SRC_PATH)/doc/t2h.init
 	$(Q)$(TEXIDEP)
-	$(M)texi2html -monolithic --init-file $(SRC_PATH_BARE)/doc/t2h.init --output $@ $<
+	$(M)texi2html -monolithic --init-file $(SRC_PATH)/doc/t2h.init --output $@ $<
 
 doc/%.pod: TAG = POD
 doc/%.pod: doc/%.texi
diff --git a/libavcodec/acelp_pitch_delay.h b/libavcodec/acelp_pitch_delay.h
index ce06bc2539..72977f1f49 100644
--- a/libavcodec/acelp_pitch_delay.h
+++ b/libavcodec/acelp_pitch_delay.h
@@ -30,11 +30,11 @@
 #define PITCH_DELAY_MAX             143
 
 /**
- * \brief Decode pitch delay of the first subframe encoded by 8 bits with 1/3
+ * @brief Decode pitch delay of the first subframe encoded by 8 bits with 1/3
  *        resolution.
- * \param ac_index adaptive codebook index (8 bits)
+ * @param ac_index adaptive codebook index (8 bits)
  *
- * \return pitch delay in 1/3 units
+ * @return pitch delay in 1/3 units
  *
  * Pitch delay is coded:
  *    with 1/3 resolution, 19  < pitch_delay <  85
@@ -43,18 +43,18 @@
 int ff_acelp_decode_8bit_to_1st_delay3(int ac_index);
 
 /**
- * \brief Decode pitch delay of the second subframe encoded by 5 or 6 bits
+ * @brief Decode pitch delay of the second subframe encoded by 5 or 6 bits
  *        with 1/3 precision.
- * \param ac_index adaptive codebook index (5 or 6 bits)
- * \param pitch_delay_min lower bound (integer) of pitch delay interval
+ * @param ac_index adaptive codebook index (5 or 6 bits)
+ * @param pitch_delay_min lower bound (integer) of pitch delay interval
  *                      for second subframe
  *
- * \return pitch delay in 1/3 units
+ * @return pitch delay in 1/3 units
  *
  * Pitch delay is coded:
  *    with 1/3 resolution, -6 < pitch_delay - int(prev_pitch_delay) < 5
  *
- * \remark The routine is used in G.729 @@8k, AMR @@10.2k, AMR @@7.95k,
+ * @remark The routine is used in G.729 @@8k, AMR @@10.2k, AMR @@7.95k,
  *         AMR @@7.4k for the second subframe.
  */
 int ff_acelp_decode_5_6_bit_to_2nd_delay3(
@@ -62,19 +62,19 @@ int ff_acelp_decode_5_6_bit_to_2nd_delay3(
         int pitch_delay_min);
 
 /**
- * \brief Decode pitch delay with 1/3 precision.
- * \param ac_index adaptive codebook index (4 bits)
- * \param pitch_delay_min lower bound (integer) of pitch delay interval for
+ * @brief Decode pitch delay with 1/3 precision.
+ * @param ac_index adaptive codebook index (4 bits)
+ * @param pitch_delay_min lower bound (integer) of pitch delay interval for
  *                      second subframe
  *
- * \return pitch delay in 1/3 units
+ * @return pitch delay in 1/3 units
  *
  * Pitch delay is coded:
  *    integers only,          -6  < pitch_delay - int(prev_pitch_delay) <= -2
  *    with 1/3 resolution,    -2  < pitch_delay - int(prev_pitch_delay) <  1
  *    integers only,           1 <= pitch_delay - int(prev_pitch_delay) <  5
  *
- * \remark The routine is used in G.729 @@6.4k, AMR @@6.7k, AMR @@5.9k,
+ * @remark The routine is used in G.729 @@6.4k, AMR @@6.7k, AMR @@5.9k,
  *         AMR @@5.15k, AMR @@4.75k for the second subframe.
  */
 int ff_acelp_decode_4bit_to_2nd_delay3(
@@ -82,44 +82,44 @@ int ff_acelp_decode_4bit_to_2nd_delay3(
         int pitch_delay_min);
 
 /**
- * \brief Decode pitch delay of the first subframe encoded by 9 bits
+ * @brief Decode pitch delay of the first subframe encoded by 9 bits
  *        with 1/6 precision.
- * \param ac_index adaptive codebook index (9 bits)
+ * @param ac_index adaptive codebook index (9 bits)
  *
- * \return pitch delay in 1/6 units
+ * @return pitch delay in 1/6 units
  *
  * Pitch delay is coded:
  *    with 1/6 resolution,  17  < pitch_delay <  95
  *    integers only,        95 <= pitch_delay <= 143
  *
- * \remark The routine is used in AMR @@12.2k for the first and third subframes.
+ * @remark The routine is used in AMR @@12.2k for the first and third subframes.
  */
 int ff_acelp_decode_9bit_to_1st_delay6(int ac_index);
 
 /**
- * \brief Decode pitch delay of the second subframe encoded by 6 bits
+ * @brief Decode pitch delay of the second subframe encoded by 6 bits
  *        with 1/6 precision.
- * \param ac_index adaptive codebook index (6 bits)
- * \param pitch_delay_min lower bound (integer) of pitch delay interval for
+ * @param ac_index adaptive codebook index (6 bits)
+ * @param pitch_delay_min lower bound (integer) of pitch delay interval for
  *                      second subframe
  *
- * \return pitch delay in 1/6 units
+ * @return pitch delay in 1/6 units
  *
  * Pitch delay is coded:
  *    with 1/6 resolution, -6 < pitch_delay - int(prev_pitch_delay) < 5
  *
- * \remark The routine is used in AMR @@12.2k for the second and fourth subframes.
+ * @remark The routine is used in AMR @@12.2k for the second and fourth subframes.
  */
 int ff_acelp_decode_6bit_to_2nd_delay6(
         int ac_index,
         int pitch_delay_min);
 
 /**
- * \brief Update past quantized energies
- * \param[in,out]  quant_energy  past quantized energies (5.10)
- * \param gain_corr_factor gain correction factor
- * \param log2_ma_pred_order log2() of MA prediction order
- * \param erasure frame erasure flag
+ * @brief Update past quantized energies
+ * @param[in,out]  quant_energy  past quantized energies (5.10)
+ * @param gain_corr_factor gain correction factor
+ * @param log2_ma_pred_order log2() of MA prediction order
+ * @param erasure frame erasure flag
  *
  * If frame erasure flag is not equal to zero, memory is updated with
  * averaged energy, attenuated by 4dB:
@@ -128,7 +128,7 @@ int ff_acelp_decode_6bit_to_2nd_delay6(
  * In normal mode memory is updated with
  *     Er - Ep = 20 * log10(gain_corr_factor)
  *
- * \remark The routine is used in G.729 and AMR (all modes).
+ * @remark The routine is used in G.729 and AMR (all modes).
  */
 void ff_acelp_update_past_gain(
         int16_t* quant_energy,
@@ -137,16 +137,16 @@ void ff_acelp_update_past_gain(
         int erasure);
 
 /**
- * \brief Decode the adaptive codebook gain and add
+ * @brief Decode the adaptive codebook gain and add
  *        correction (4.1.5 and 3.9.1 of G.729).
- * \param dsp initialized dsputil context
- * \param gain_corr_factor gain correction factor (2.13)
- * \param fc_v fixed-codebook vector (2.13)
- * \param mr_energy mean innovation energy and fixed-point correction (7.13)
- * \param[in,out]  quant_energy  past quantized energies (5.10)
- * \param subframe_size length of subframe
+ * @param dsp initialized dsputil context
+ * @param gain_corr_factor gain correction factor (2.13)
+ * @param fc_v fixed-codebook vector (2.13)
+ * @param mr_energy mean innovation energy and fixed-point correction (7.13)
+ * @param[in,out]  quant_energy  past quantized energies (5.10)
+ * @param subframe_size length of subframe
  *
- * \return quantized fixed-codebook gain (14.1)
+ * @return quantized fixed-codebook gain (14.1)
  *
  * The routine implements equations 69, 66 and 71 of the G.729 specification (3.9.1)
  *
@@ -205,7 +205,7 @@ void ff_acelp_update_past_gain(
  *
  *        mr_energy = Em + 10log(N) + 10log(2^26)
  *
- * \remark The routine is used in G.729 and AMR (all modes).
+ * @remark The routine is used in G.729 and AMR (all modes).
  */
 int16_t ff_acelp_decode_gain_code(
     DSPContext *dsp,
diff --git a/libavcodec/arm/aac.h b/libavcodec/arm/aac.h
index 3b14c094c6..bd4d293f02 100644
--- a/libavcodec/arm/aac.h
+++ b/libavcodec/arm/aac.h
@@ -114,12 +114,15 @@ static inline float *VMUL4S(float *dst, const float *v, unsigned idx,
              "vmov     d1,  %2,  %3          \n\t"
              "lsls     %6,  %6,  #1          \n\t"
              "and      %0,  %5,  #1<<31      \n\t"
+             "it       cs                    \n\t"
              "lslcs    %5,  %5,  #1          \n\t"
              "lsls     %6,  %6,  #1          \n\t"
              "and      %1,  %5,  #1<<31      \n\t"
+             "it       cs                    \n\t"
              "lslcs    %5,  %5,  #1          \n\t"
              "lsls     %6,  %6,  #1          \n\t"
              "and      %2,  %5,  #1<<31      \n\t"
+             "it       cs                    \n\t"
              "lslcs    %5,  %5,  #1          \n\t"
              "vmov     d4,  %0,  %1          \n\t"
              "and      %3,  %5,  #1<<31      \n\t"
diff --git a/libavcodec/arm/ac3dsp_arm.S b/libavcodec/arm/ac3dsp_arm.S
index 545714cff1..9a7d20eb7b 100644
--- a/libavcodec/arm/ac3dsp_arm.S
+++ b/libavcodec/arm/ac3dsp_arm.S
@@ -27,6 +27,7 @@ function ff_ac3_update_bap_counts_arm, export=1
         lsl             r3,  lr,  #1
         ldrh            r12, [r0, r3]
         subs            r2,  r2,  #1
+        it              gt
         ldrbgt          lr,  [r1], #1
         add             r12, r12, #1
         strh            r12, [r0, r3]
diff --git a/libavcodec/arm/ac3dsp_armv6.S b/libavcodec/arm/ac3dsp_armv6.S
index 2b2f2acf22..615baf94e0 100644
--- a/libavcodec/arm/ac3dsp_armv6.S
+++ b/libavcodec/arm/ac3dsp_armv6.S
@@ -42,9 +42,11 @@ function ff_ac3_bit_alloc_calc_bap_armv6, export=1
         mov             r11, r10
         ldrb            r10, [r4], #1                   @ band_start_tab[band++]
         subs            r9,  r9,  r5                    @   - floor
+        it              lt
         movlt           r9,  #0
         cmp             r10, r3                         @   - end
         and             r9,  r9,  r8                    @   & 0x1fe0
+        ite             gt
         subgt           r8,  r3,  r11
         suble           r8,  r10, r11
         add             r9,  r9,  r5                    @   + floor => m
diff --git a/libavcodec/arm/ac3dsp_neon.S b/libavcodec/arm/ac3dsp_neon.S
index 946b39f25b..fdf1deabc9 100644
--- a/libavcodec/arm/ac3dsp_neon.S
+++ b/libavcodec/arm/ac3dsp_neon.S
@@ -41,6 +41,7 @@ endfunc
 
 function ff_ac3_exponent_min_neon, export=1
         cmp             r1,  #0
+        it              eq
         bxeq            lr
         push            {lr}
         mov             r12, #256
diff --git a/libavcodec/arm/asm.S b/libavcodec/arm/asm.S
index bb999fd61a..fc7ee60357 100644
--- a/libavcodec/arm/asm.S
+++ b/libavcodec/arm/asm.S
@@ -26,7 +26,16 @@
 #   define ELF @
 #endif
 
+#if CONFIG_THUMB
+#   define A @
+#   define T
+#else
+#   define A
+#   define T @
+#endif
+
         .syntax unified
+T       .thumb
 
 .macro  require8 val=1
 ELF     .eabi_attribute 24, \val
@@ -82,6 +91,90 @@ ELF     .size   \name, . - \name
 #endif
 .endm
 
+.macro  ldr_pre         rt,  rn,  rm:vararg
+A       ldr             \rt, [\rn, \rm]!
+T       add             \rn, \rn, \rm
+T       ldr             \rt, [\rn]
+.endm
+
+.macro  ldr_post        rt,  rn,  rm:vararg
+A       ldr             \rt, [\rn], \rm
+T       ldr             \rt, [\rn]
+T       add             \rn, \rn, \rm
+.endm
+
+.macro  ldrd_reg        rt,  rt2, rn,  rm
+A       ldrd            \rt, \rt2, [\rn, \rm]
+T       add             \rt, \rn, \rm
+T       ldrd            \rt, \rt2, [\rt]
+.endm
+
+.macro  ldrd_post       rt,  rt2, rn,  rm
+A       ldrd            \rt, \rt2, [\rn], \rm
+T       ldrd            \rt, \rt2, [\rn]
+T       add             \rn, \rn, \rm
+.endm
+
+.macro  ldrh_pre        rt,  rn,  rm
+A       ldrh            \rt, [\rn, \rm]!
+T       add             \rn, \rn, \rm
+T       ldrh            \rt, [\rn]
+.endm
+
+.macro  ldrh_dpre       rt,  rn,  rm
+A       ldrh            \rt, [\rn, -\rm]!
+T       sub             \rn, \rn, \rm
+T       ldrh            \rt, [\rn]
+.endm
+
+.macro  ldrh_post       rt,  rn,  rm
+A       ldrh            \rt, [\rn], \rm
+T       ldrh            \rt, [\rn]
+T       add             \rn, \rn, \rm
+.endm
+
+.macro  str_post       rt,  rn,  rm:vararg
+A       str             \rt, [\rn], \rm
+T       str             \rt, [\rn]
+T       add             \rn, \rn, \rm
+.endm
+
+.macro  strb_post       rt,  rn,  rm:vararg
+A       strb            \rt, [\rn], \rm
+T       strb            \rt, [\rn]
+T       add             \rn, \rn, \rm
+.endm
+
+.macro  strd_post       rt,  rt2, rn,  rm
+A       strd            \rt, \rt2, [\rn], \rm
+T       strd            \rt, \rt2, [\rn]
+T       add             \rn, \rn, \rm
+.endm
+
+.macro  strh_pre        rt,  rn,  rm
+A       strh            \rt, [\rn, \rm]!
+T       add             \rn, \rn, \rm
+T       strh            \rt, [\rn]
+.endm
+
+.macro  strh_dpre       rt,  rn,  rm
+A       strh            \rt, [\rn, -\rm]!
+T       sub             \rn, \rn, \rm
+T       strh            \rt, [\rn]
+.endm
+
+.macro  strh_post       rt,  rn,  rm
+A       strh            \rt, [\rn], \rm
+T       strh            \rt, [\rn]
+T       add             \rn, \rn, \rm
+.endm
+
+.macro  strh_dpost       rt,  rn,  rm
+A       strh            \rt, [\rn], -\rm
+T       strh            \rt, [\rn]
+T       sub             \rn, \rn, \rm
+.endm
+
 #if HAVE_VFP_ARGS
         .eabi_attribute 28, 1
 #   define VFP
diff --git a/libavcodec/arm/dcadsp_neon.S b/libavcodec/arm/dcadsp_neon.S
index c3bddd3e41..852527a59e 100644
--- a/libavcodec/arm/dcadsp_neon.S
+++ b/libavcodec/arm/dcadsp_neon.S
@@ -27,6 +27,7 @@ function ff_dca_lfe_fir_neon, export=1
         add             r5,  r2,  #256*4-16     @ cf1
         sub             r1,  r1,  #12
         cmp             r3,  #32
+        ite             eq
         moveq           r6,  #256/32
         movne           r6,  #256/64
 NOVFP   vldr            s0,  [sp, #16]          @ scale
diff --git a/libavcodec/arm/dsputil_arm.S b/libavcodec/arm/dsputil_arm.S
index 7ee85e808b..c614206bac 100644
--- a/libavcodec/arm/dsputil_arm.S
+++ b/libavcodec/arm/dsputil_arm.S
@@ -554,10 +554,12 @@ endfunc
         and             r9,  r5,  r14
         and             r10, r6,  r14
         and             r11, r7,  r14
+        it              eq
         andeq           r14, r14, r14, \rnd #1
         add             r8,  r8,  r10
         add             r9,  r9,  r11
         ldr             r12, =0xfcfcfcfc >> 2
+        itt             eq
         addeq           r8,  r8,  r14
         addeq           r9,  r9,  r14
         and             r4,  r12, r4,  lsr #2
@@ -638,8 +640,10 @@ function ff_add_pixels_clamped_arm, export=1
         mvn             r5,  r5
         mvn             r7,  r7
         tst             r6,  #0x100
+        it              ne
         movne           r6,  r5,  lsr #24
         tst             r8,  #0x100
+        it              ne
         movne           r8,  r7,  lsr #24
         mov             r9,  r6
         ldrsh           r5,  [r0, #4]           /* moved form [A] */
@@ -654,8 +658,10 @@ function ff_add_pixels_clamped_arm, export=1
         mvn             r5,  r5
         mvn             r7,  r7
         tst             r6,  #0x100
+        it              ne
         movne           r6,  r5,  lsr #24
         tst             r8,  #0x100
+        it              ne
         movne           r8,  r7,  lsr #24
         orr             r9,  r9,  r6,  lsl #16
         ldr             r4,  [r1, #4]           /* moved form [B] */
@@ -676,8 +682,10 @@ function ff_add_pixels_clamped_arm, export=1
         mvn             r5,  r5
         mvn             r7,  r7
         tst             r6,  #0x100
+        it              ne
         movne           r6,  r5,  lsr #24
         tst             r8,  #0x100
+        it              ne
         movne           r8,  r7,  lsr #24
         mov             r9,  r6
         ldrsh           r5,  [r0, #12]          /* moved from [D] */
@@ -692,8 +700,10 @@ function ff_add_pixels_clamped_arm, export=1
         mvn             r5,  r5
         mvn             r7,  r7
         tst             r6,  #0x100
+        it              ne
         movne           r6,  r5,  lsr #24
         tst             r8,  #0x100
+        it              ne
         movne           r8,  r7,  lsr #24
         orr             r9,  r9,  r6,  lsl #16
         add             r0,  r0,  #16           /* moved from [E] */
diff --git a/libavcodec/arm/dsputil_armv6.S b/libavcodec/arm/dsputil_armv6.S
index 214d947da3..a2c8588fad 100644
--- a/libavcodec/arm/dsputil_armv6.S
+++ b/libavcodec/arm/dsputil_armv6.S
@@ -47,16 +47,16 @@ function ff_put_pixels16_armv6, export=1
         ldr             r5,  [r1, #4]
         ldr             r6,  [r1, #8]
         ldr             r7,  [r1, #12]
-        ldr             r4,  [r1], r2
+        ldr_post        r4,  r1,  r2
         strd            r6,  r7,  [r0, #8]
         ldr             r9,  [r1, #4]
-        strd            r4,  r5,  [r0],  r2
+        strd_post       r4,  r5,  r0,  r2
         ldr             r10, [r1, #8]
         ldr             r11, [r1, #12]
-        ldr             r8,  [r1], r2
+        ldr_post        r8,  r1,  r2
         strd            r10, r11, [r0, #8]
         subs            r3,  r3,  #2
-        strd            r8,  r9,  [r0],  r2
+        strd_post       r8,  r9,  r0,  r2
         bne             1b
 
         pop             {r4-r11}
@@ -67,12 +67,12 @@ function ff_put_pixels8_armv6, export=1
         push            {r4-r7}
 1:
         ldr             r5,  [r1, #4]
-        ldr             r4,  [r1], r2
+        ldr_post        r4,  r1,  r2
         ldr             r7,  [r1, #4]
-        strd            r4,  r5,  [r0],  r2
-        ldr             r6,  [r1], r2
+        strd_post       r4,  r5,  r0,  r2
+        ldr_post        r6,  r1,  r2
         subs            r3,  r3,  #2
-        strd            r6,  r7,  [r0],  r2
+        strd_post       r6,  r7,  r0,  r2
         bne             1b
 
         pop             {r4-r7}
@@ -90,7 +90,7 @@ function ff_put_pixels8_x2_armv6, export=1
         ldr             r5,  [r1, #4]
         ldr             r7,  [r1, #5]
         lsr             r6,  r4,  #8
-        ldr             r8,  [r1, r2]!
+        ldr_pre         r8,  r1,  r2
         orr             r6,  r6,  r5,  lsl #24
         ldr             r9,  [r1, #4]
         ldr             r11, [r1, #5]
@@ -112,9 +112,9 @@ function ff_put_pixels8_x2_armv6, export=1
         uhadd8          r9,  r9,  r11
         and             r6,  r6,  r12
         uadd8           r8,  r8,  r14
-        strd            r4,  r5,  [r0],  r2
+        strd_post       r4,  r5,  r0,  r2
         uadd8           r9,  r9,  r6
-        strd            r8,  r9,  [r0],  r2
+        strd_post       r8,  r9,  r0,  r2
         bne             1b
 
         pop             {r4-r11, pc}
@@ -127,7 +127,7 @@ function ff_put_pixels8_y2_armv6, export=1
         orr             r12, r12, r12, lsl #16
         ldr             r4,  [r1]
         ldr             r5,  [r1, #4]
-        ldr             r6,  [r1, r2]!
+        ldr_pre         r6,  r1,  r2
         ldr             r7,  [r1, #4]
 1:
         subs            r3,  r3,  #2
@@ -136,7 +136,7 @@ function ff_put_pixels8_y2_armv6, export=1
         uhadd8          r9,  r5,  r7
         eor             r11, r5,  r7
         and             r10, r10, r12
-        ldr             r4,  [r1, r2]!
+        ldr_pre         r4,  r1,  r2
         uadd8           r8,  r8,  r10
         and             r11, r11, r12
         uadd8           r9,  r9,  r11
@@ -148,11 +148,11 @@ function ff_put_pixels8_y2_armv6, export=1
         eor             r7,  r5,  r7
         uadd8           r10, r10, r6
         and             r7,  r7,  r12
-        ldr             r6,  [r1, r2]!
+        ldr_pre         r6,  r1,  r2
         uadd8           r11, r11, r7
-        strd            r8,  r9,  [r0],  r2
+        strd_post       r8,  r9,  r0,  r2
         ldr             r7,  [r1, #4]
-        strd            r10, r11, [r0],  r2
+        strd_post       r10, r11, r0,  r2
         bne             1b
 
         pop             {r4-r11}
@@ -166,7 +166,7 @@ function ff_put_pixels8_x2_no_rnd_armv6, export=1
         ldr             r4,  [r1]
         ldr             r5,  [r1, #4]
         ldr             r7,  [r1, #5]
-        ldr             r8,  [r1, r2]!
+        ldr_pre         r8,  r1,  r2
         ldr             r9,  [r1, #4]
         ldr             r14, [r1, #5]
         add             r1,  r1,  r2
@@ -191,16 +191,16 @@ function ff_put_pixels8_y2_no_rnd_armv6, export=1
         push            {r4-r9, lr}
         ldr             r4,  [r1]
         ldr             r5,  [r1, #4]
-        ldr             r6,  [r1, r2]!
+        ldr_pre         r6,  r1,  r2
         ldr             r7,  [r1, #4]
 1:
         subs            r3,  r3,  #2
         uhadd8          r8,  r4,  r6
-        ldr             r4,  [r1, r2]!
+        ldr_pre         r4,  r1,  r2
         uhadd8          r9,  r5,  r7
         ldr             r5,  [r1, #4]
         uhadd8          r12, r4,  r6
-        ldr             r6,  [r1, r2]!
+        ldr_pre         r6,  r1,  r2
         uhadd8          r14, r5,  r7
         ldr             r7,  [r1, #4]
         stm             r0,  {r8,r9}
@@ -220,44 +220,44 @@ function ff_avg_pixels8_armv6, export=1
         orr             lr,  lr,  lr,  lsl #16
         ldrd            r4,  r5,  [r0]
         ldr             r10, [r1, #4]
-        ldr             r9,  [r1], r2
+        ldr_post        r9,  r1,  r2
         subs            r3,  r3,  #2
 1:
         pld             [r1, r2]
         eor             r8,  r4,  r9
         uhadd8          r4,  r4,  r9
         eor             r12, r5,  r10
-        ldrd            r6,  r7,  [r0, r2]
+        ldrd_reg        r6,  r7,  r0,  r2
         uhadd8          r5,  r5,  r10
         and             r8,  r8,  lr
         ldr             r10, [r1, #4]
         and             r12, r12, lr
         uadd8           r4,  r4,  r8
-        ldr             r9,  [r1], r2
+        ldr_post        r9,  r1,  r2
         eor             r8,  r6,  r9
         uadd8           r5,  r5,  r12
         pld             [r1, r2,  lsl #1]
         eor             r12, r7,  r10
         uhadd8          r6,  r6,  r9
-        strd            r4,  r5,  [r0], r2
+        strd_post       r4,  r5,  r0,  r2
         uhadd8          r7,  r7,  r10
         beq             2f
         and             r8,  r8,  lr
-        ldrd            r4,  r5,  [r0, r2]
+        ldrd_reg        r4,  r5,  r0,  r2
         uadd8           r6,  r6,  r8
         ldr             r10, [r1, #4]
         and             r12, r12, lr
         subs            r3,  r3,  #2
         uadd8           r7,  r7,  r12
-        ldr             r9,  [r1], r2
-        strd            r6,  r7,  [r0], r2
+        ldr_post        r9,  r1,  r2
+        strd_post       r6,  r7,  r0,  r2
         b               1b
 2:
         and             r8,  r8,  lr
         and             r12, r12, lr
         uadd8           r6,  r6,  r8
         uadd8           r7,  r7,  r12
-        strd            r6,  r7,  [r0], r2
+        strd_post       r6,  r7,  r0,  r2
 
         pop             {r4-r10, pc}
 endfunc
@@ -284,7 +284,7 @@ function ff_add_pixels_clamped_armv6, export=1
         orr             r6,  r8,  r5,  lsl #8
         orr             r7,  r4,  lr,  lsl #8
         subs            r3,  r3,  #1
-        strd            r6,  r7,  [r1],  r2
+        strd_post       r6,  r7,  r1,  r2
         bgt             1b
         pop             {r4-r8,pc}
 endfunc
@@ -294,7 +294,7 @@ function ff_get_pixels_armv6, export=1
         push            {r4-r8, lr}
         mov             lr,  #8
 1:
-        ldrd            r4,  r5,  [r1],  r2
+        ldrd_post       r4,  r5,  r1,  r2
         subs            lr,  lr,  #1
         uxtb16          r6,  r4
         uxtb16          r4,  r4,  ror #8
@@ -317,8 +317,8 @@ function ff_diff_pixels_armv6, export=1
         push            {r4-r9, lr}
         mov             lr,  #8
 1:
-        ldrd            r4,  r5,  [r1],  r3
-        ldrd            r6,  r7,  [r2],  r3
+        ldrd_post       r4,  r5,  r1,  r3
+        ldrd_post       r6,  r7,  r2,  r3
         uxtb16          r8,  r4
         uxtb16          r4,  r4,  ror #8
         uxtb16          r9,  r6
@@ -492,19 +492,19 @@ function ff_pix_abs8_armv6, export=1
         push            {r4-r9, lr}
         mov             r0,  #0
         mov             lr,  #0
-        ldrd            r4,  r5,  [r1], r3
+        ldrd_post       r4,  r5,  r1,  r3
 1:
         subs            r12, r12, #2
         ldr             r7,  [r2, #4]
-        ldr             r6,  [r2], r3
-        ldrd            r8,  r9,  [r1], r3
+        ldr_post        r6,  r2,  r3
+        ldrd_post       r8,  r9,  r1,  r3
         usada8          r0,  r4,  r6,  r0
         pld             [r2, r3]
         usada8          lr,  r5,  r7,  lr
         ldr             r7,  [r2, #4]
-        ldr             r6,  [r2], r3
+        ldr_post        r6,  r2,  r3
         beq             2f
-        ldrd            r4,  r5,  [r1], r3
+        ldrd_post       r4,  r5,  r1,  r3
         usada8          r0,  r8,  r6,  r0
         pld             [r2, r3]
         usada8          lr,  r9,  r7,  lr
@@ -613,7 +613,7 @@ function ff_pix_sum_armv6, export=1
         ldr             r7,  [r0, #12]
         usada8          r2,  r6,  lr,  r2
         beq             2f
-        ldr             r4,  [r0, r1]!
+        ldr_pre         r4,  r0,  r1
         usada8          r3,  r7,  lr,  r3
         bgt             1b
 2:
diff --git a/libavcodec/arm/dsputil_neon.S b/libavcodec/arm/dsputil_neon.S
index 0dbf5ca48a..2147658af6 100644
--- a/libavcodec/arm/dsputil_neon.S
+++ b/libavcodec/arm/dsputil_neon.S
@@ -531,6 +531,7 @@ function ff_vorbis_inverse_coupling_neon, export=1
 
 2:      vst1.32         {d2-d3},  [r3, :128]!
         vst1.32         {d0-d1},  [r12,:128]!
+        it              lt
         bxlt            lr
 
 3:      vld1.32         {d2-d3},  [r1,:128]
@@ -575,6 +576,7 @@ NOVFP   vdup.32         q8,  r2
 2:      vst1.32         {q2},[r0,:128]!
         vst1.32         {q3},[r0,:128]!
         ands            len, len, #15
+        it              eq
         bxeq            lr
 3:      vld1.32         {q0},[r1,:128]!
         vmul.f32        q0,  q0,  q8
@@ -638,6 +640,7 @@ NOVFP   ldr             r3,  [sp]
 2:      vst1.32         {q8},[r0,:128]!
         vst1.32         {q9},[r0,:128]!
         ands            r3,  r3,  #7
+        it              eq
         popeq           {pc}
 3:      vld1.32         {q0},[r1,:128]!
         ldr             r12, [r2], #4
diff --git a/libavcodec/arm/dsputil_vfp.S b/libavcodec/arm/dsputil_vfp.S
index 497c02be92..108208174d 100644
--- a/libavcodec/arm/dsputil_vfp.S
+++ b/libavcodec/arm/dsputil_vfp.S
@@ -55,18 +55,23 @@ function ff_vector_fmul_vfp, export=1
 1:
         subs            r3,  r3,  #16
         vmul.f32        s12, s4,  s12
+        itttt           ge
         vldmiage        r1!, {s16-s19}
         vldmiage        r2!, {s24-s27}
         vldmiage        r1!, {s20-s23}
         vldmiage        r2!, {s28-s31}
+        it              ge
         vmulge.f32      s24, s16, s24
         vstmia          r0!, {s8-s11}
         vstmia          r0!, {s12-s15}
+        it              ge
         vmulge.f32      s28, s20, s28
+        itttt           gt
         vldmiagt        r1!, {s0-s3}
         vldmiagt        r2!, {s8-s11}
         vldmiagt        r1!, {s4-s7}
         vldmiagt        r2!, {s12-s15}
+        ittt            ge
         vmulge.f32      s8,  s0,  s8
         vstmiage        r0!, {s24-s27}
         vstmiage        r0!, {s28-s31}
@@ -97,33 +102,49 @@ function ff_vector_fmul_reverse_vfp, export=1
         vmul.f32        s11, s0,  s11
 1:
         subs            r3,  r3,  #16
+        it              ge
         vldmdbge        r2!, {s16-s19}
         vmul.f32        s12, s7,  s12
+        it              ge
         vldmiage        r1!, {s24-s27}
         vmul.f32        s13, s6,  s13
+        it              ge
         vldmdbge        r2!, {s20-s23}
         vmul.f32        s14, s5,  s14
+        it              ge
         vldmiage        r1!, {s28-s31}
         vmul.f32        s15, s4,  s15
+        it              ge
         vmulge.f32      s24, s19, s24
+        it              gt
         vldmdbgt        r2!, {s0-s3}
+        it              ge
         vmulge.f32      s25, s18, s25
         vstmia          r0!, {s8-s13}
+        it              ge
         vmulge.f32      s26, s17, s26
+        it              gt
         vldmiagt        r1!, {s8-s11}
+        itt             ge
         vmulge.f32      s27, s16, s27
         vmulge.f32      s28, s23, s28
+        it              gt
         vldmdbgt        r2!, {s4-s7}
+        it              ge
         vmulge.f32      s29, s22, s29
         vstmia          r0!, {s14-s15}
+        ittt            ge
         vmulge.f32      s30, s21, s30
         vmulge.f32      s31, s20, s31
         vmulge.f32      s8,  s3,  s8
+        it              gt
         vldmiagt        r1!, {s12-s15}
+        itttt           ge
         vmulge.f32      s9,  s2,  s9
         vmulge.f32      s10, s1,  s10
         vstmiage        r0!, {s24-s27}
         vmulge.f32      s11, s0,  s11
+        it              ge
         vstmiage        r0!, {s28-s31}
         bgt             1b
 
diff --git a/libavcodec/arm/fmtconvert_neon.S b/libavcodec/arm/fmtconvert_neon.S
index 359e57e40b..d1ad32ed27 100644
--- a/libavcodec/arm/fmtconvert_neon.S
+++ b/libavcodec/arm/fmtconvert_neon.S
@@ -71,6 +71,7 @@ endfunc
 
 function ff_float_to_int16_interleave_neon, export=1
         cmp             r3, #2
+        itt             lt
         ldrlt           r1, [r1]
         blt             ff_float_to_int16_neon
         bne             4f
@@ -196,6 +197,7 @@ function ff_float_to_int16_interleave_neon, export=1
         vst1.64         {d3},     [r8], ip
         vst1.64         {d7},     [r8], ip
         subs            r3,  r3,  #4
+        it              eq
         popeq           {r4-r8,pc}
         cmp             r3,  #4
         add             r0,  r0,  #8
@@ -305,6 +307,7 @@ function ff_float_to_int16_interleave_neon, export=1
         vst1.32         {d23[1]}, [r8], ip
 8:      subs            r3,  r3,  #2
         add             r0,  r0,  #4
+        it              eq
         popeq           {r4-r8,pc}
 
         @ 1 channel
@@ -354,6 +357,7 @@ function ff_float_to_int16_interleave_neon, export=1
         vst1.16         {d2[3]},  [r5,:16], ip
         vst1.16         {d3[1]},  [r5,:16], ip
         vst1.16         {d3[3]},  [r5,:16], ip
+        it              eq
         popeq           {r4-r8,pc}
         vld1.64         {d0-d1},  [r4,:128]!
         vcvt.s32.f32    q0,  q0,  #16
diff --git a/libavcodec/arm/fmtconvert_vfp.S b/libavcodec/arm/fmtconvert_vfp.S
index da2ef8c158..7e2eb83620 100644
--- a/libavcodec/arm/fmtconvert_vfp.S
+++ b/libavcodec/arm/fmtconvert_vfp.S
@@ -46,6 +46,7 @@ function ff_float_to_int16_vfp, export=1
         vmov            r5,  r6,  s2, s3
         vmov            r7,  r8,  s4, s5
         vmov            ip,  lr,  s6, s7
+        it              gt
         vldmiagt        r1!, {s16-s23}
         ssat            r4,  #16, r4
         ssat            r3,  #16, r3
@@ -53,10 +54,12 @@ function ff_float_to_int16_vfp, export=1
         ssat            r5,  #16, r5
         pkhbt           r3,  r3,  r4, lsl #16
         pkhbt           r4,  r5,  r6, lsl #16
+        itttt           gt
         vcvtgt.s32.f32  s0,  s16
         vcvtgt.s32.f32  s1,  s17
         vcvtgt.s32.f32  s2,  s18
         vcvtgt.s32.f32  s3,  s19
+        itttt           gt
         vcvtgt.s32.f32  s4,  s20
         vcvtgt.s32.f32  s5,  s21
         vcvtgt.s32.f32  s6,  s22
diff --git a/libavcodec/arm/h264dsp_neon.S b/libavcodec/arm/h264dsp_neon.S
index bd15ced736..338de6f643 100644
--- a/libavcodec/arm/h264dsp_neon.S
+++ b/libavcodec/arm/h264dsp_neon.S
@@ -71,7 +71,9 @@ function ff_\type\()_h264_chroma_mc8_neon, export=1
         pld             [r1]
         pld             [r1, r2]
 
-        muls            r7,  r4,  r5
+A       muls            r7,  r4,  r5
+T       mul             r7,  r4,  r5
+T       cmp             r7,  #0
         rsb             r6,  r7,  r5,  lsl #3
         rsb             ip,  r7,  r4,  lsl #3
         sub             r4,  r7,  r4,  lsl #3
@@ -197,7 +199,9 @@ function ff_\type\()_h264_chroma_mc4_neon, export=1
         pld             [r1]
         pld             [r1, r2]
 
-        muls            r7,  r4,  r5
+A       muls            r7,  r4,  r5
+T       mul             r7,  r4,  r5
+T       cmp             r7,  #0
         rsb             r6,  r7,  r5,  lsl #3
         rsb             ip,  r7,  r4,  lsl #3
         sub             r4,  r7,  r4,  lsl #3
@@ -368,10 +372,10 @@ function ff_\type\()_h264_chroma_mc2_neon, export=1
         pop             {r4-r6, pc}
 2:
 .ifc \type,put
-        ldrh            r5,  [r1], r2
-        strh            r5,  [r0], r2
-        ldrh            r6,  [r1], r2
-        strh            r6,  [r0], r2
+        ldrh_post       r5,  r1,  r2
+        strh_post       r5,  r0,  r2
+        ldrh_post       r6,  r1,  r2
+        strh_post       r6,  r0,  r2
 .else
         vld1.16         {d16[0]}, [r1], r2
         vld1.16         {d16[1]}, [r1], r2
@@ -404,28 +408,17 @@ endfunc
         ldr             ip,  [sp]
         tst             r2,  r2
         ldr             ip,  [ip]
+        it              ne
         tstne           r3,  r3
         vmov.32         d24[0], ip
         and             ip,  ip,  ip, lsl #16
+        it              eq
         bxeq            lr
         ands            ip,  ip,  ip, lsl #8
+        it              lt
         bxlt            lr
         .endm
 
-        .macro align_push_regs
-        and             ip,  sp,  #15
-        add             ip,  ip,  #32
-        sub             sp,  sp,  ip
-        vst1.64         {d12-d15}, [sp,:128]
-        sub             sp,  sp,  #32
-        vst1.64         {d8-d11},  [sp,:128]
-        .endm
-
-        .macro align_pop_regs
-        vld1.64         {d8-d11},  [sp,:128]!
-        vld1.64         {d12-d15}, [sp,:128], ip
-        .endm
-
         .macro h264_loop_filter_luma
         vdup.8          q11, r2         @ alpha
         vmovl.u8        q12, d24
@@ -506,7 +499,7 @@ function ff_h264_v_loop_filter_luma_neon, export=1
         vld1.64         {d18,d19}, [r0,:128], r1
         vld1.64         {d16,d17}, [r0,:128], r1
 
-        align_push_regs
+        vpush           {d8-d15}
 
         h264_loop_filter_luma
 
@@ -516,7 +509,7 @@ function ff_h264_v_loop_filter_luma_neon, export=1
         vst1.64         {d0, d1},  [r0,:128], r1
         vst1.64         {d10,d11}, [r0,:128]
 
-        align_pop_regs
+        vpop            {d8-d15}
         bx              lr
 endfunc
 
@@ -543,7 +536,7 @@ function ff_h264_h_loop_filter_luma_neon, export=1
 
         transpose_8x8   q3, q10, q9, q8, q0, q1, q2, q13
 
-        align_push_regs
+        vpush           {d8-d15}
 
         h264_loop_filter_luma
 
@@ -568,7 +561,7 @@ function ff_h264_h_loop_filter_luma_neon, export=1
         vst1.32         {d1[1]},  [r0], r1
         vst1.32         {d11[1]}, [r0], r1
 
-        align_pop_regs
+        vpop            {d8-d15}
         bx              lr
 endfunc
 
@@ -1116,6 +1109,7 @@ function \type\()_h264_qpel8_hv_lowpass_neon
         vrhadd.u8       d11, d11, d7
         sub             r0,  r0,  r2,  lsl #3
 .endif
+
         vst1.64         {d12},     [r0,:64], r2
         vst1.64         {d13},     [r0,:64], r2
         vst1.64         {d14},     [r0,:64], r2
@@ -1263,7 +1257,9 @@ function ff_\type\()_h264_qpel8_mc11_neon, export=1
 \type\()_h264_qpel8_mc11:
         lowpass_const   r3
         mov             r11, sp
-        bic             sp,  sp,  #15
+A       bic             sp,  sp,  #15
+T       bic             r0,  r11, #15
+T       mov             sp,  r0
         sub             sp,  sp,  #64
         mov             r0,  sp
         sub             r1,  r1,  #2
@@ -1271,14 +1267,14 @@ function ff_\type\()_h264_qpel8_mc11_neon, export=1
         mov             ip,  #8
         vpush           {d8-d15}
         bl              put_h264_qpel8_h_lowpass_neon
-        ldrd            r0,  [r11]
+        ldrd            r0,  [r11], #8
         mov             r3,  r2
         add             ip,  sp,  #64
         sub             r1,  r1,  r2, lsl #1
         mov             r2,  #8
         bl              \type\()_h264_qpel8_v_lowpass_l2_neon
         vpop            {d8-d15}
-        add             sp,  r11, #8
+        mov             sp,  r11
         pop             {r11, pc}
 endfunc
 
@@ -1287,7 +1283,9 @@ function ff_\type\()_h264_qpel8_mc21_neon, export=1
 \type\()_h264_qpel8_mc21:
         lowpass_const   r3
         mov             r11, sp
-        bic             sp,  sp,  #15
+A       bic             sp,  sp,  #15
+T       bic             r0,  r11, #15
+T       mov             sp,  r0
         sub             sp,  sp,  #(8*8+16*12)
         sub             r1,  r1,  #2
         mov             r3,  #8
@@ -1296,14 +1294,14 @@ function ff_\type\()_h264_qpel8_mc21_neon, export=1
         vpush           {d8-d15}
         bl              put_h264_qpel8_h_lowpass_neon
         mov             r4,  r0
-        ldrd            r0,  [r11]
+        ldrd            r0,  [r11], #8
         sub             r1,  r1,  r2, lsl #1
         sub             r1,  r1,  #2
         mov             r3,  r2
         sub             r2,  r4,  #64
         bl              \type\()_h264_qpel8_hv_lowpass_l2_neon
         vpop            {d8-d15}
-        add             sp,  r11,  #8
+        mov             sp,  r11
         pop             {r4, r10, r11, pc}
 endfunc
 
@@ -1330,7 +1328,9 @@ function ff_\type\()_h264_qpel8_mc12_neon, export=1
 \type\()_h264_qpel8_mc12:
         lowpass_const   r3
         mov             r11, sp
-        bic             sp,  sp,  #15
+A       bic             sp,  sp,  #15
+T       bic             r0,  r11, #15
+T       mov             sp,  r0
         sub             sp,  sp,  #(8*8+16*12)
         sub             r1,  r1,  r2, lsl #1
         mov             r3,  r2
@@ -1339,20 +1339,22 @@ function ff_\type\()_h264_qpel8_mc12_neon, export=1
         vpush           {d8-d15}
         bl              put_h264_qpel8_v_lowpass_neon
         mov             r4,  r0
-        ldrd            r0,  [r11]
+        ldrd            r0,  [r11], #8
         sub             r1,  r1,  r3, lsl #1
         sub             r1,  r1,  #2
         sub             r2,  r4,  #64
         bl              \type\()_h264_qpel8_hv_lowpass_l2_neon
         vpop            {d8-d15}
-        add             sp,  r11,  #8
+        mov             sp,  r11
         pop             {r4, r10, r11, pc}
 endfunc
 
 function ff_\type\()_h264_qpel8_mc22_neon, export=1
         push            {r4, r10, r11, lr}
         mov             r11, sp
-        bic             sp,  sp,  #15
+A       bic             sp,  sp,  #15
+T       bic             r4,  r11, #15
+T       mov             sp,  r4
         sub             r1,  r1,  r2, lsl #1
         sub             r1,  r1,  #2
         mov             r3,  r2
@@ -1441,21 +1443,23 @@ function ff_\type\()_h264_qpel16_mc11_neon, export=1
 \type\()_h264_qpel16_mc11:
         lowpass_const   r3
         mov             r11, sp
-        bic             sp,  sp,  #15
+A       bic             sp,  sp,  #15
+T       bic             r0,  r11, #15
+T       mov             sp,  r0
         sub             sp,  sp,  #256
         mov             r0,  sp
         sub             r1,  r1,  #2
         mov             r3,  #16
         vpush           {d8-d15}
         bl              put_h264_qpel16_h_lowpass_neon
-        ldrd            r0,  [r11]
+        ldrd            r0,  [r11], #8
         mov             r3,  r2
         add             ip,  sp,  #64
         sub             r1,  r1,  r2, lsl #1
         mov             r2,  #16
         bl              \type\()_h264_qpel16_v_lowpass_l2_neon
         vpop            {d8-d15}
-        add             sp,  r11, #8
+        mov             sp,  r11
         pop             {r4, r11, pc}
 endfunc
 
@@ -1464,20 +1468,22 @@ function ff_\type\()_h264_qpel16_mc21_neon, export=1
 \type\()_h264_qpel16_mc21:
         lowpass_const   r3
         mov             r11, sp
-        bic             sp,  sp,  #15
+A       bic             sp,  sp,  #15
+T       bic             r0,  r11, #15
+T       mov             sp,  r0
         sub             sp,  sp,  #(16*16+16*12)
         sub             r1,  r1,  #2
         mov             r0,  sp
         vpush           {d8-d15}
         bl              put_h264_qpel16_h_lowpass_neon_packed
         mov             r4,  r0
-        ldrd            r0,  [r11]
+        ldrd            r0,  [r11], #8
         sub             r1,  r1,  r2, lsl #1
         sub             r1,  r1,  #2
         mov             r3,  r2
         bl              \type\()_h264_qpel16_hv_lowpass_l2_neon
         vpop            {d8-d15}
-        add             sp,  r11,  #8
+        mov             sp,  r11
         pop             {r4-r5, r9-r11, pc}
 endfunc
 
@@ -1504,7 +1510,9 @@ function ff_\type\()_h264_qpel16_mc12_neon, export=1
 \type\()_h264_qpel16_mc12:
         lowpass_const   r3
         mov             r11, sp
-        bic             sp,  sp,  #15
+A       bic             sp,  sp,  #15
+T       bic             r0,  r11, #15
+T       mov             sp,  r0
         sub             sp,  sp,  #(16*16+16*12)
         sub             r1,  r1,  r2, lsl #1
         mov             r0,  sp
@@ -1512,13 +1520,13 @@ function ff_\type\()_h264_qpel16_mc12_neon, export=1
         vpush           {d8-d15}
         bl              put_h264_qpel16_v_lowpass_neon_packed
         mov             r4,  r0
-        ldrd            r0,  [r11]
+        ldrd            r0,  [r11], #8
         sub             r1,  r1,  r3, lsl #1
         sub             r1,  r1,  #2
         mov             r2,  r3
         bl              \type\()_h264_qpel16_hv_lowpass_l2_neon
         vpop            {d8-d15}
-        add             sp,  r11,  #8
+        mov             sp,  r11
         pop             {r4-r5, r9-r11, pc}
 endfunc
 
@@ -1526,7 +1534,9 @@ function ff_\type\()_h264_qpel16_mc22_neon, export=1
         push            {r4, r9-r11, lr}
         lowpass_const   r3
         mov             r11, sp
-        bic             sp,  sp,  #15
+A       bic             sp,  sp,  #15
+T       bic             r4,  r11, #15
+T       mov             sp,  r4
         sub             r1,  r1,  r2, lsl #1
         sub             r1,  r1,  #2
         mov             r3,  r2
diff --git a/libavcodec/arm/h264idct_neon.S b/libavcodec/arm/h264idct_neon.S
index afd3718518..6ea56587b8 100644
--- a/libavcodec/arm/h264idct_neon.S
+++ b/libavcodec/arm/h264idct_neon.S
@@ -106,10 +106,12 @@ function ff_h264_idct_add16_neon, export=1
         blt             2f
         ldrsh           lr,  [r1]
         add             r0,  r0,  r4
+        it              ne
         movne           lr,  #0
         cmp             lr,  #0
-        adrne           lr,  ff_h264_idct_dc_add_neon
-        adreq           lr,  ff_h264_idct_add_neon
+        ite             ne
+        adrne           lr,  ff_h264_idct_dc_add_neon + CONFIG_THUMB
+        adreq           lr,  ff_h264_idct_add_neon    + CONFIG_THUMB
         blx             lr
 2:      subs            ip,  ip,  #1
         add             r1,  r1,  #32
@@ -132,8 +134,9 @@ function ff_h264_idct_add16intra_neon, export=1
         add             r0,  r0,  r4
         cmp             r8,  #0
         ldrsh           r8,  [r1]
-        adrne           lr,  ff_h264_idct_add_neon
-        adreq           lr,  ff_h264_idct_dc_add_neon
+        iteet           ne
+        adrne           lr,  ff_h264_idct_add_neon    + CONFIG_THUMB
+        adreq           lr,  ff_h264_idct_dc_add_neon + CONFIG_THUMB
         cmpeq           r8,  #0
         blxne           lr
         subs            ip,  ip,  #1
@@ -159,12 +162,14 @@ function ff_h264_idct_add8_neon, export=1
         add             r1,  r3,  r12, lsl #5
         cmp             r8,  #0
         ldrsh           r8,  [r1]
-        adrne           lr,  ff_h264_idct_add_neon
-        adreq           lr,  ff_h264_idct_dc_add_neon
+        iteet           ne
+        adrne           lr,  ff_h264_idct_add_neon    + CONFIG_THUMB
+        adreq           lr,  ff_h264_idct_dc_add_neon + CONFIG_THUMB
         cmpeq           r8,  #0
         blxne           lr
         add             r12, r12, #1
         cmp             r12, #4
+        itt             eq
         moveq           r12, #16
         moveq           r4,  r9
         cmp             r12, #20
@@ -365,10 +370,12 @@ function ff_h264_idct8_add4_neon, export=1
         blt             2f
         ldrsh           lr,  [r1]
         add             r0,  r0,  r4
+        it              ne
         movne           lr,  #0
         cmp             lr,  #0
-        adrne           lr,  ff_h264_idct8_dc_add_neon
-        adreq           lr,  ff_h264_idct8_add_neon
+        ite             ne
+        adrne           lr,  ff_h264_idct8_dc_add_neon + CONFIG_THUMB
+        adreq           lr,  ff_h264_idct8_add_neon    + CONFIG_THUMB
         blx             lr
 2:      subs            r12, r12, #4
         add             r1,  r1,  #128
diff --git a/libavcodec/arm/mathops.h b/libavcodec/arm/mathops.h
index 299a973cb6..d67714c496 100644
--- a/libavcodec/arm/mathops.h
+++ b/libavcodec/arm/mathops.h
@@ -64,11 +64,14 @@ static inline av_const int mid_pred(int a, int b, int c)
     __asm__ (
         "mov   %0, %2  \n\t"
         "cmp   %1, %2  \n\t"
+        "itt   gt      \n\t"
         "movgt %0, %1  \n\t"
         "movgt %1, %2  \n\t"
         "cmp   %1, %3  \n\t"
+        "it    le      \n\t"
         "movle %1, %3  \n\t"
         "cmp   %0, %1  \n\t"
+        "it    gt      \n\t"
         "movgt %0, %1  \n\t"
         : "=&r"(m), "+r"(a)
         : "r"(b), "r"(c)
diff --git a/libavcodec/arm/mdct_neon.S b/libavcodec/arm/mdct_neon.S
index fcf802275f..2def704497 100644
--- a/libavcodec/arm/mdct_neon.S
+++ b/libavcodec/arm/mdct_neon.S
@@ -191,7 +191,9 @@ function ff_mdct_calc_neon, export=1
         vadd.f32        d17, d17, d3            @ in2u+in1d     -I
 1:
         vmul.f32        d7,  d0,  d21           @  I*s
-        ldr             r10, [r3, lr, lsr #1]
+A       ldr             r10, [r3, lr, lsr #1]
+T       lsr             r10, lr,  #1
+T       ldr             r10, [r3, r10]
         vmul.f32        d6,  d1,  d20           @ -R*c
         ldr             r6,  [r3, #4]!
         vmul.f32        d4,  d1,  d21           @ -R*s
diff --git a/libavcodec/arm/mpegaudiodsp_fixed_armv6.S b/libavcodec/arm/mpegaudiodsp_fixed_armv6.S
index 9ec731480b..b517b973e7 100644
--- a/libavcodec/arm/mpegaudiodsp_fixed_armv6.S
+++ b/libavcodec/arm/mpegaudiodsp_fixed_armv6.S
@@ -75,7 +75,7 @@ function ff_mpadsp_apply_window_fixed_armv6, export=1
         sum8            r8,  r9,  r1,  r0,  r10, r11, r12, lr
         sum8            r8,  r9,  r1,  r2,  r10, r11, r12, lr, rsb, 32
         round           r10, r8,  r9
-        strh            r10, [r3], r4
+        strh_post       r10, r3,  r4
 
         mov             lr,  #15
 1:
@@ -127,10 +127,10 @@ function ff_mpadsp_apply_window_fixed_armv6, export=1
         round           r10, r8,  r9
         adds            r8,  r8,  r4
         adc             r9,  r9,  r7
-        strh            r10, [r3], r12
+        strh_post       r10, r3,  r12
         round           r11, r8,  r9
         subs            lr,  lr,  #1
-        strh            r11, [r5], -r12
+        strh_dpost      r11, r5, r12
         bgt             1b
 
         sum8            r8,  r9,  r1,  r0,  r10, r11, r12, lr, rsb, 33
diff --git a/libavcodec/arm/mpegvideo_armv5te_s.S b/libavcodec/arm/mpegvideo_armv5te_s.S
index 82095ab15d..3db9c734e9 100644
--- a/libavcodec/arm/mpegvideo_armv5te_s.S
+++ b/libavcodec/arm/mpegvideo_armv5te_s.S
@@ -38,15 +38,21 @@
 
 .macro  dequant_t       dst, src, mul, add, tmp
         rsbs            \tmp, ip, \src, asr #16
+        it              gt
         addgt           \tmp, \add, #0
+        it              lt
         rsblt           \tmp, \add, #0
+        it              ne
         smlatbne        \dst, \src, \mul, \tmp
 .endm
 
 .macro  dequant_b       dst, src, mul, add, tmp
         rsbs            \tmp, ip, \src, lsl #16
+        it              gt
         addgt           \tmp, \add, #0
+        it              lt
         rsblt           \tmp, \add, #0
+        it              ne
         smlabbne        \dst, \src, \mul, \tmp
 .endm
 
@@ -80,21 +86,27 @@ function ff_dct_unquantize_h263_armv5te, export=1
         strh            lr, [r0], #2
 
         subs            r3, r3, #8
+        it              gt
         ldrdgt          r4, [r0, #0] /* load data early to avoid load/use pipeline stall */
         bgt             1b
 
         adds            r3, r3, #2
+        it              le
         pople           {r4-r9,pc}
 2:
         ldrsh           r9, [r0, #0]
         ldrsh           lr, [r0, #2]
         mov             r8, r2
         cmp             r9, #0
+        it              lt
         rsblt           r8, r2, #0
+        it              ne
         smlabbne        r9, r9, r1, r8
         mov             r8, r2
         cmp             lr, #0
+        it              lt
         rsblt           r8, r2, #0
+        it              ne
         smlabbne        lr, lr, r1, r8
         strh            r9, [r0], #2
         strh            lr, [r0], #2
diff --git a/libavcodec/arm/mpegvideo_neon.S b/libavcodec/arm/mpegvideo_neon.S
index b695fb7c22..849047e13c 100644
--- a/libavcodec/arm/mpegvideo_neon.S
+++ b/libavcodec/arm/mpegvideo_neon.S
@@ -57,6 +57,7 @@ function ff_dct_unquantize_h263_neon, export=1
         subs            r3,  r3,  #16
         vst1.16         {q0},     [r1,:128]!
         vst1.16         {q8},     [r1,:128]!
+        it              le
         bxle            lr
         cmp             r3,  #8
         bgt             1b
@@ -78,6 +79,7 @@ function ff_dct_unquantize_h263_intra_neon, export=1
         ldr             r6,  [r0, #AC_PRED]
         add             lr,  r0,  #INTER_SCANTAB_RASTER_END
         cmp             r6,  #0
+        it              ne
         movne           r12, #63
         bne             1f
         ldr             r12, [r12, r2, lsl #2]
@@ -86,9 +88,11 @@ function ff_dct_unquantize_h263_intra_neon, export=1
         ldrsh           r4,  [r1]
         cmp             r5,  #0
         mov             r5,  r1
+        it              ne
         movne           r2,  #0
         bne             2f
         cmp             r2,  #4
+        it              ge
         addge           r0,  r0,  #4
         sub             r2,  r3,  #1
         ldr             r6,  [r0, #Y_DC_SCALE]
diff --git a/libavcodec/arm/rdft_neon.S b/libavcodec/arm/rdft_neon.S
index 4f8a1032cc..19886e6d0b 100644
--- a/libavcodec/arm/rdft_neon.S
+++ b/libavcodec/arm/rdft_neon.S
@@ -137,6 +137,7 @@ function ff_rdft_calc_neon, export=1
         vst1.32         {d22},    [r5,:64]
 
         cmp             r6,  #0
+        it              eq
         popeq           {r4-r8,pc}
 
         vmul.f32        d22, d22, d18
diff --git a/libavcodec/arm/simple_idct_arm.S b/libavcodec/arm/simple_idct_arm.S
index ecb83d23ad..990dde6ff7 100644
--- a/libavcodec/arm/simple_idct_arm.S
+++ b/libavcodec/arm/simple_idct_arm.S
@@ -121,11 +121,13 @@ __b_evaluation:
         ldr r11, [r12, #offW7]   @ R11=W7
         mul r5, r10, r7          @ R5=W5*ROWr16[1]=b2 (ROWr16[1] must be the second arg, to have the possibility to save 1 cycle)
         mul r7, r11, r7          @ R7=W7*ROWr16[1]=b3 (ROWr16[1] must be the second arg, to have the possibility to save 1 cycle)
-                teq r2, #0               @ if null avoid muls
-                mlane r0, r9, r2, r0     @ R0+=W3*ROWr16[3]=b0 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle)
+        teq r2, #0               @ if null avoid muls
+        itttt ne
+        mlane r0, r9, r2, r0     @ R0+=W3*ROWr16[3]=b0 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle)
         rsbne r2, r2, #0         @ R2=-ROWr16[3]
         mlane r1, r11, r2, r1    @ R1-=W7*ROWr16[3]=b1 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle)
         mlane r5, r8, r2, r5     @ R5-=W1*ROWr16[3]=b2 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle)
+        it    ne
         mlane r7, r10, r2, r7    @ R7-=W5*ROWr16[3]=b3 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle)
 
         @@ at this point, R0=b0,  R1=b1, R2 (free), R3=ROWr32[2], R4=ROWr32[3],
@@ -148,19 +150,23 @@ __b_evaluation:
         @@ MAC16(b3, -W1, row[7]);
         @@ MAC16(b1, -W5, row[7]);
         mov r3, r3, asr #16      @ R3=ROWr16[5]
-                teq r3, #0               @ if null avoid muls
+        teq r3, #0               @ if null avoid muls
+        it    ne
         mlane r0, r10, r3, r0    @ R0+=W5*ROWr16[5]=b0
         mov r4, r4, asr #16      @ R4=ROWr16[7]
+        itttt ne
         mlane r5, r11, r3, r5    @ R5+=W7*ROWr16[5]=b2
         mlane r7, r9, r3, r7     @ R7+=W3*ROWr16[5]=b3
         rsbne r3, r3, #0         @ R3=-ROWr16[5]
         mlane r1, r8, r3, r1     @ R7-=W1*ROWr16[5]=b1
         @@ R3 is free now
-                teq r4, #0               @ if null avoid muls
+        teq r4, #0               @ if null avoid muls
+        itttt ne
         mlane r0, r11, r4, r0    @ R0+=W7*ROWr16[7]=b0
         mlane r5, r9, r4, r5     @ R5+=W3*ROWr16[7]=b2
         rsbne r4, r4, #0         @ R4=-ROWr16[7]
         mlane r7, r8, r4, r7     @ R7-=W1*ROWr16[7]=b3
+        it    ne
         mlane r1, r10, r4, r1    @ R1-=W5*ROWr16[7]=b1
         @@ R4 is free now
 __end_b_evaluation:
@@ -204,16 +210,19 @@ __a_evaluation:
         @@ a2 -= W4*row[4]
         @@ a3 += W4*row[4]
         ldrsh r11, [r14, #8]     @ R11=ROWr16[4]
-                teq r11, #0              @ if null avoid muls
+        teq r11, #0              @ if null avoid muls
+        it    ne
         mulne r11, r9, r11       @ R11=W4*ROWr16[4]
         @@ R9 is free now
         ldrsh r9, [r14, #12]     @ R9=ROWr16[6]
+        itttt ne
         addne r6, r6, r11        @ R6+=W4*ROWr16[4] (a0)
         subne r2, r2, r11        @ R2-=W4*ROWr16[4] (a1)
         subne r3, r3, r11        @ R3-=W4*ROWr16[4] (a2)
         addne r4, r4, r11        @ R4+=W4*ROWr16[4] (a3)
         @@ W6 alone is no more useful, save W2*ROWr16[6] in it instead
-                teq r9, #0               @ if null avoid muls
+        teq r9, #0               @ if null avoid muls
+        itttt ne
         mulne r11, r10, r9       @ R11=W6*ROWr16[6]
         addne r6, r6, r11        @ R6+=W6*ROWr16[6] (a0)
         mulne r10, r8, r9        @ R10=W2*ROWr16[6]
@@ -222,6 +231,7 @@ __a_evaluation:
         @@ a1 -= W2*row[6];
         @@ a2 += W2*row[6];
         subne r4, r4, r11        @ R4-=W6*ROWr16[6] (a3)
+        itt   ne
         subne r2, r2, r10        @ R2-=W2*ROWr16[6] (a1)
         addne r3, r3, r10        @ R3+=W2*ROWr16[6] (a2)
 
@@ -323,10 +333,12 @@ __b_evaluation2:
         ldrsh r2, [r14, #48]
         mul r7, r11, r7          @ R7=W7*ROWr16[1]=b3 (ROWr16[1] must be the second arg, to have the possibility to save 1 cycle)
         teq r2, #0               @ if 0, then avoid muls
+        itttt ne
         mlane r0, r9, r2, r0     @ R0+=W3*ROWr16[3]=b0 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle)
         rsbne r2, r2, #0         @ R2=-ROWr16[3]
         mlane r1, r11, r2, r1    @ R1-=W7*ROWr16[3]=b1 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle)
         mlane r5, r8, r2, r5     @ R5-=W1*ROWr16[3]=b2 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle)
+        it    ne
         mlane r7, r10, r2, r7    @ R7-=W5*ROWr16[3]=b3 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle)
 
         @@ at this point, R0=b0,  R1=b1, R2 (free), R3 (free), R4 (free),
@@ -342,18 +354,22 @@ __b_evaluation2:
         @@ MAC16(b1, -W5, col[7x8]);
         ldrsh r3, [r14, #80]     @ R3=COLr16[5x8]
         teq r3, #0               @ if 0 then avoid muls
+        itttt ne
         mlane r0, r10, r3, r0    @ R0+=W5*ROWr16[5x8]=b0
         mlane r5, r11, r3, r5    @ R5+=W7*ROWr16[5x8]=b2
         mlane r7, r9, r3, r7     @ R7+=W3*ROWr16[5x8]=b3
         rsbne r3, r3, #0         @ R3=-ROWr16[5x8]
         ldrsh r4, [r14, #112]    @ R4=COLr16[7x8]
+        it    ne
         mlane r1, r8, r3, r1     @ R7-=W1*ROWr16[5x8]=b1
         @@ R3 is free now
         teq r4, #0               @ if 0 then avoid muls
+        itttt ne
         mlane r0, r11, r4, r0    @ R0+=W7*ROWr16[7x8]=b0
         mlane r5, r9, r4, r5     @ R5+=W3*ROWr16[7x8]=b2
         rsbne r4, r4, #0         @ R4=-ROWr16[7x8]
         mlane r7, r8, r4, r7     @ R7-=W1*ROWr16[7x8]=b3
+        it    ne
         mlane r1, r10, r4, r1    @ R1-=W5*ROWr16[7x8]=b1
         @@ R4 is free now
 __end_b_evaluation2:
@@ -390,15 +406,18 @@ __a_evaluation2:
         @@ a3 += W4*row[4]
         ldrsh r11, [r14, #64]    @ R11=ROWr16[4]
         teq r11, #0              @ if null avoid muls
+        itttt ne
         mulne r11, r9, r11       @ R11=W4*ROWr16[4]
         @@ R9 is free now
         addne r6, r6, r11        @ R6+=W4*ROWr16[4] (a0)
         subne r2, r2, r11        @ R2-=W4*ROWr16[4] (a1)
         subne r3, r3, r11        @ R3-=W4*ROWr16[4] (a2)
         ldrsh r9, [r14, #96]     @ R9=ROWr16[6]
+        it    ne
         addne r4, r4, r11        @ R4+=W4*ROWr16[4] (a3)
         @@ W6 alone is no more useful, save W2*ROWr16[6] in it instead
         teq r9, #0               @ if null avoid muls
+        itttt ne
         mulne r11, r10, r9       @ R11=W6*ROWr16[6]
         addne r6, r6, r11        @ R6+=W6*ROWr16[6] (a0)
         mulne r10, r8, r9        @ R10=W2*ROWr16[6]
@@ -407,6 +426,7 @@ __a_evaluation2:
         @@ a1 -= W2*row[6];
         @@ a2 += W2*row[6];
         subne r4, r4, r11        @ R4-=W6*ROWr16[6] (a3)
+        itt   ne
         subne r2, r2, r10        @ R2-=W2*ROWr16[6] (a1)
         addne r3, r3, r10        @ R3+=W2*ROWr16[6] (a2)
 __end_a_evaluation2:
diff --git a/libavcodec/arm/simple_idct_armv5te.S b/libavcodec/arm/simple_idct_armv5te.S
index 3c4b5c06d1..71727ceccc 100644
--- a/libavcodec/arm/simple_idct_armv5te.S
+++ b/libavcodec/arm/simple_idct_armv5te.S
@@ -49,6 +49,7 @@ function idct_row_armv5te
         ldrd   v1, [a1, #8]
         ldrd   a3, [a1]              /* a3 = row[1:0], a4 = row[3:2] */
         orrs   v1, v1, v2
+        itt    eq
         cmpeq  v1, a4
         cmpeq  v1, a3, lsr #16
         beq    row_dc_only
@@ -269,6 +270,7 @@ function idct_col_armv5te
         ldmfd  sp!, {a3, a4}
         adds   a2, a3, v1
         mov    a2, a2, lsr #20
+        it     mi
         orrmi  a2, a2, #0xf000
         add    ip, a4, v2
         mov    ip, ip, asr #20
@@ -276,6 +278,7 @@ function idct_col_armv5te
         str    a2, [a1]
         subs   a3, a3, v1
         mov    a2, a3, lsr #20
+        it     mi
         orrmi  a2, a2, #0xf000
         sub    a4, a4, v2
         mov    a4, a4, asr #20
@@ -285,6 +288,7 @@ function idct_col_armv5te
 
         subs   a2, a3, v3
         mov    a2, a2, lsr #20
+        it     mi
         orrmi  a2, a2, #0xf000
         sub    ip, a4, v4
         mov    ip, ip, asr #20
@@ -292,6 +296,7 @@ function idct_col_armv5te
         str    a2, [a1, #(16*1)]
         adds   a3, a3, v3
         mov    a2, a3, lsr #20
+        it     mi
         orrmi  a2, a2, #0xf000
         add    a4, a4, v4
         mov    a4, a4, asr #20
@@ -301,6 +306,7 @@ function idct_col_armv5te
 
         adds   a2, a3, v5
         mov    a2, a2, lsr #20
+        it     mi
         orrmi  a2, a2, #0xf000
         add    ip, a4, v6
         mov    ip, ip, asr #20
@@ -308,6 +314,7 @@ function idct_col_armv5te
         str    a2, [a1, #(16*2)]
         subs   a3, a3, v5
         mov    a2, a3, lsr #20
+        it     mi
         orrmi  a2, a2, #0xf000
         sub    a4, a4, v6
         mov    a4, a4, asr #20
@@ -317,6 +324,7 @@ function idct_col_armv5te
 
         adds   a2, a3, v7
         mov    a2, a2, lsr #20
+        it     mi
         orrmi  a2, a2, #0xf000
         add    ip, a4, fp
         mov    ip, ip, asr #20
@@ -324,6 +332,7 @@ function idct_col_armv5te
         str    a2, [a1, #(16*3)]
         subs   a3, a3, v7
         mov    a2, a3, lsr #20
+        it     mi
         orrmi  a2, a2, #0xf000
         sub    a4, a4, fp
         mov    a4, a4, asr #20
@@ -335,15 +344,19 @@ endfunc
 
 .macro  clip   dst, src:vararg
         movs   \dst, \src
+        it     mi
         movmi  \dst, #0
         cmp    \dst, #255
+        it     gt
         movgt  \dst, #255
 .endm
 
 .macro  aclip  dst, src:vararg
         adds   \dst, \src
+        it     mi
         movmi  \dst, #0
         cmp    \dst, #255
+        it     gt
         movgt  \dst, #255
 .endm
 
@@ -370,35 +383,35 @@ function idct_col_put_armv5te
         orr    a2, a3, a4, lsl #8
         rsb    v2, lr, lr, lsl #3
         ldmfd  sp!, {a3, a4}
-        strh   a2, [v2, v1]!
+        strh_pre a2, v2, v1
 
         sub    a2, a3, v3
         clip   a2, a2, asr #20
         sub    ip, a4, v4
         clip   ip, ip, asr #20
         orr    a2, a2, ip, lsl #8
-        strh   a2, [v1, lr]!
+        strh_pre a2, v1, lr
         add    a3, a3, v3
         clip   a2, a3, asr #20
         add    a4, a4, v4
         clip   a4, a4, asr #20
         orr    a2, a2, a4, lsl #8
         ldmfd  sp!, {a3, a4}
-        strh   a2, [v2, -lr]!
+        strh_dpre a2, v2, lr
 
         add    a2, a3, v5
         clip   a2, a2, asr #20
         add    ip, a4, v6
         clip   ip, ip, asr #20
         orr    a2, a2, ip, lsl #8
-        strh   a2, [v1, lr]!
+        strh_pre a2, v1, lr
         sub    a3, a3, v5
         clip   a2, a3, asr #20
         sub    a4, a4, v6
         clip   a4, a4, asr #20
         orr    a2, a2, a4, lsl #8
         ldmfd  sp!, {a3, a4}
-        strh   a2, [v2, -lr]!
+        strh_dpre a2, v2, lr
 
         add    a2, a3, v7
         clip   a2, a2, asr #20
@@ -411,7 +424,7 @@ function idct_col_put_armv5te
         sub    a4, a4, fp
         clip   a4, a4, asr #20
         orr    a2, a2, a4, lsl #8
-        strh   a2, [v2, -lr]
+        strh_dpre a2, v2, lr
 
         ldr    pc, [sp], #4
 endfunc
@@ -436,7 +449,7 @@ function idct_col_add_armv5te
         ldr    v1, [sp, #32]
         sub    a4, a4, v2
         rsb    v2, v1, v1, lsl #3
-        ldrh   ip, [v2, lr]!
+        ldrh_pre ip, v2, lr
         strh   a2, [lr]
         and    a2, ip, #255
         aclip  a3, a2, a3, asr #20
@@ -448,7 +461,7 @@ function idct_col_add_armv5te
         strh   a2, [v2]
 
         ldmfd  sp!, {a3, a4}
-        ldrh   ip, [lr, v1]!
+        ldrh_pre ip, lr, v1
         sub    a2, a3, v3
         add    a3, a3, v3
         and    v3, ip, #255
@@ -458,7 +471,7 @@ function idct_col_add_armv5te
         aclip  v3, v3, ip, lsr #8
         orr    a2, a2, v3, lsl #8
         add    a4, a4, v4
-        ldrh   ip, [v2, -v1]!
+        ldrh_dpre ip, v2, v1
         strh   a2, [lr]
         and    a2, ip, #255
         aclip  a3, a2, a3, asr #20
@@ -468,7 +481,7 @@ function idct_col_add_armv5te
         strh   a2, [v2]
 
         ldmfd  sp!, {a3, a4}
-        ldrh   ip, [lr, v1]!
+        ldrh_pre ip, lr, v1
         add    a2, a3, v5
         sub    a3, a3, v5
         and    v3, ip, #255
@@ -478,7 +491,7 @@ function idct_col_add_armv5te
         aclip  v3, v3, ip, lsr #8
         orr    a2, a2, v3, lsl #8
         sub    a4, a4, v6
-        ldrh   ip, [v2, -v1]!
+        ldrh_dpre ip, v2, v1
         strh   a2, [lr]
         and    a2, ip, #255
         aclip  a3, a2, a3, asr #20
@@ -488,7 +501,7 @@ function idct_col_add_armv5te
         strh   a2, [v2]
 
         ldmfd  sp!, {a3, a4}
-        ldrh   ip, [lr, v1]!
+        ldrh_pre ip, lr, v1
         add    a2, a3, v7
         sub    a3, a3, v7
         and    v3, ip, #255
@@ -498,7 +511,7 @@ function idct_col_add_armv5te
         aclip  v3, v3, ip, lsr #8
         orr    a2, a2, v3, lsl #8
         sub    a4, a4, fp
-        ldrh   ip, [v2, -v1]!
+        ldrh_dpre ip, v2, v1
         strh   a2, [lr]
         and    a2, ip, #255
         aclip  a3, a2, a3, asr #20
diff --git a/libavcodec/arm/simple_idct_armv6.S b/libavcodec/arm/simple_idct_armv6.S
index d61c1fd3ea..a176b3a7b4 100644
--- a/libavcodec/arm/simple_idct_armv6.S
+++ b/libavcodec/arm/simple_idct_armv6.S
@@ -200,6 +200,7 @@ function idct_row_armv6
         ldr    r3, [r0, #8]          /* r3 = row[3,1] */
         ldr    r2, [r0]              /* r2 = row[2,0] */
         orrs   lr, lr, ip
+        itt    eq
         cmpeq  lr, r3
         cmpeq  lr, r2, lsr #16
         beq    1f
@@ -282,14 +283,14 @@ function idct_col_put_armv6
         pop    {r1, r2}
         idct_finish_shift_sat COL_SHIFT
 
-        strb   r4, [r1], r2
-        strb   r5, [r1], r2
-        strb   r6, [r1], r2
-        strb   r7, [r1], r2
-        strb   r11,[r1], r2
-        strb   r10,[r1], r2
-        strb   r9, [r1], r2
-        strb   r8, [r1], r2
+        strb_post r4, r1, r2
+        strb_post r5, r1, r2
+        strb_post r6, r1, r2
+        strb_post r7, r1, r2
+        strb_post r11,r1, r2
+        strb_post r10,r1, r2
+        strb_post r9, r1, r2
+        strb_post r8, r1, r2
 
         sub    r1, r1, r2, lsl #3
 
@@ -318,16 +319,16 @@ function idct_col_add_armv6
         add    ip, r3, ip, asr #COL_SHIFT
         usat   ip, #8, ip
         add    r4, r7, r4, asr #COL_SHIFT
-        strb   ip, [r1], r2
+        strb_post ip, r1, r2
         ldrb   ip, [r1, r2]
         usat   r4, #8, r4
         ldrb   r11,[r1, r2, lsl #2]
         add    r5, ip, r5, asr #COL_SHIFT
         usat   r5, #8, r5
-        strb   r4, [r1], r2
+        strb_post r4, r1, r2
         ldrb   r3, [r1, r2]
         ldrb   ip, [r1, r2, lsl #2]
-        strb   r5, [r1], r2
+        strb_post r5, r1, r2
         ldrb   r7, [r1, r2]
         ldrb   r4, [r1, r2, lsl #2]
         add    r6, r3, r6, asr #COL_SHIFT
@@ -340,11 +341,11 @@ function idct_col_add_armv6
         usat   r8, #8, r8
         add    lr, r4, lr, asr #COL_SHIFT
         usat   lr, #8, lr
-        strb   r6, [r1], r2
-        strb   r10,[r1], r2
-        strb   r9, [r1], r2
-        strb   r8, [r1], r2
-        strb   lr, [r1], r2
+        strb_post r6, r1, r2
+        strb_post r10,r1, r2
+        strb_post r9, r1, r2
+        strb_post r8, r1, r2
+        strb_post lr, r1, r2
 
         sub    r1, r1, r2, lsl #3
 
diff --git a/libavcodec/arm/simple_idct_neon.S b/libavcodec/arm/simple_idct_neon.S
index 17cde5835a..64a7fbf13a 100644
--- a/libavcodec/arm/simple_idct_neon.S
+++ b/libavcodec/arm/simple_idct_neon.S
@@ -71,7 +71,7 @@ function idct_row4_pld_neon
         add             r3,  r0,  r1,  lsl #2
         pld             [r0, r1]
         pld             [r0, r1, lsl #1]
-        pld             [r3, -r1]
+A       pld             [r3, -r1]
         pld             [r3]
         pld             [r3, r1]
         add             r3,  r3,  r1,  lsl #1
@@ -164,6 +164,7 @@ function idct_col4_neon
         orrs            r4,  r4,  r5
 
         idct_col4_top
+        it              eq
         addeq           r2,  r2,  #16
         beq             1f
 
@@ -176,6 +177,7 @@ function idct_col4_neon
 
 1:      orrs            r6,  r6,  r7
         ldrd            r4,  [r2, #16]
+        it              eq
         addeq           r2,  r2,  #16
         beq             2f
 
@@ -187,6 +189,7 @@ function idct_col4_neon
 
 2:      orrs            r4,  r4,  r5
         ldrd            r4,  [r2, #16]
+        it              eq
         addeq           r2,  r2,  #16
         beq             3f
 
@@ -199,6 +202,7 @@ function idct_col4_neon
         vadd.i32        q13, q13, q8
 
 3:      orrs            r4,  r4,  r5
+        it              eq
         addeq           r2,  r2,  #16
         beq             4f
 
diff --git a/libavcodec/arm/synth_filter_neon.S b/libavcodec/arm/synth_filter_neon.S
index 1464abe562..3f91d67506 100644
--- a/libavcodec/arm/synth_filter_neon.S
+++ b/libavcodec/arm/synth_filter_neon.S
@@ -100,9 +100,11 @@ NOVFP   vldr            s0,  [sp, #12*4]        @ scale
         vst1.32         {q9},     [r2,:128]
 
         subs            r1,  r1,  #1
+        it              eq
         popeq           {r4-r11,pc}
 
         cmp             r4,  #0
+        itt             eq
         subeq           r8,  r8,  #512*4
         subeq           r9,  r9,  #512*4
         sub             r5,  r5,  #512*4
diff --git a/libavcodec/arm/vp56_arith.h b/libavcodec/arm/vp56_arith.h
index cd02579e5b..ece9ac2a6c 100644
--- a/libavcodec/arm/vp56_arith.h
+++ b/libavcodec/arm/vp56_arith.h
@@ -21,6 +21,14 @@
 #ifndef AVCODEC_ARM_VP56_ARITH_H
 #define AVCODEC_ARM_VP56_ARITH_H
 
+#if CONFIG_THUMB
+#   define A(x)
+#   define T(x) x
+#else
+#   define A(x) x
+#   define T(x)
+#endif
+
 #if HAVE_ARMV6 && HAVE_INLINE_ASM
 
 #define vp56_rac_get_prob vp56_rac_get_prob_armv6
@@ -32,15 +40,21 @@ static inline int vp56_rac_get_prob_armv6(VP56RangeCoder *c, int pr)
     unsigned bit;
 
     __asm__ ("adds    %3,  %3,  %0           \n"
+             "itt     cs                     \n"
              "cmpcs   %7,  %4                \n"
-             "ldrcsh  %2,  [%4], #2          \n"
+           A("ldrcsh  %2,  [%4], #2          \n")
+           T("ldrhcs  %2,  [%4], #2          \n")
              "rsb     %0,  %6,  #256         \n"
              "smlabb  %0,  %5,  %6,  %0      \n"
+           T("itttt   cs                     \n")
              "rev16cs %2,  %2                \n"
-             "orrcs   %1,  %1,  %2,  lsl %3  \n"
+           T("lslcs   %2,  %2,  %3           \n")
+           T("orrcs   %1,  %1,  %2           \n")
+           A("orrcs   %1,  %1,  %2,  lsl %3  \n")
              "subcs   %3,  %3,  #16          \n"
              "lsr     %0,  %0,  #8           \n"
              "cmp     %1,  %0,  lsl #16      \n"
+             "ittte   ge                     \n"
              "subge   %1,  %1,  %0,  lsl #16 \n"
              "subge   %0,  %5,  %0           \n"
              "movge   %2,  #1                \n"
@@ -64,12 +78,17 @@ static inline int vp56_rac_get_prob_branchy_armv6(VP56RangeCoder *c, int pr)
     unsigned tmp;
 
     __asm__ ("adds    %3,  %3,  %0           \n"
+             "itt     cs                     \n"
              "cmpcs   %7,  %4                \n"
-             "ldrcsh  %2,  [%4], #2          \n"
+           A("ldrcsh  %2,  [%4], #2          \n")
+           T("ldrhcs  %2,  [%4], #2          \n")
              "rsb     %0,  %6,  #256         \n"
              "smlabb  %0,  %5,  %6,  %0      \n"
+           T("itttt   cs                     \n")
              "rev16cs %2,  %2                \n"
-             "orrcs   %1,  %1,  %2,  lsl %3  \n"
+           T("lslcs   %2,  %2,  %3           \n")
+           T("orrcs   %1,  %1,  %2           \n")
+           A("orrcs   %1,  %1,  %2,  lsl %3  \n")
              "subcs   %3,  %3,  #16          \n"
              "lsr     %0,  %0,  #8           \n"
              "lsl     %2,  %0,  #16          \n"
diff --git a/libavcodec/arm/vp8_armv6.S b/libavcodec/arm/vp8_armv6.S
index 93f4dd664b..b995360e0c 100644
--- a/libavcodec/arm/vp8_armv6.S
+++ b/libavcodec/arm/vp8_armv6.S
@@ -25,13 +25,18 @@
         lsl             \cw, \cw, \t0
         lsl             \t0, \h,  \t0
         rsb             \h,  \pr, #256
+        it              cs
         ldrhcs          \t1, [\buf], #2
         smlabb          \h,  \t0, \pr, \h
+T       itttt           cs
         rev16cs         \t1, \t1
-        orrcs           \cw, \cw, \t1, lsl \bs
+A       orrcs           \cw, \cw, \t1, lsl \bs
+T       lslcs           \t1, \t1, \bs
+T       orrcs           \cw, \cw, \t1
         subcs           \bs, \bs, #16
         lsr             \h,  \h,  #8
         cmp             \cw, \h,  lsl #16
+        itt             ge
         subge           \cw, \cw, \h,  lsl #16
         subge           \h,  \t0, \h
 .endm
@@ -40,14 +45,20 @@
         adds            \bs, \bs, \t0
         lsl             \cw, \cw, \t0
         lsl             \t0, \h,  \t0
+        it              cs
         ldrhcs          \t1, [\buf], #2
         mov             \h,  #128
+        it              cs
         rev16cs         \t1, \t1
         add             \h,  \h,  \t0, lsl #7
-        orrcs           \cw, \cw, \t1, lsl \bs
+A       orrcs           \cw, \cw, \t1, lsl \bs
+T       ittt            cs
+T       lslcs           \t1, \t1, \bs
+T       orrcs           \cw, \cw, \t1
         subcs           \bs, \bs, #16
         lsr             \h,  \h,  #8
         cmp             \cw, \h,  lsl #16
+        itt             ge
         subge           \cw, \cw, \h,  lsl #16
         subge           \h,  \t0, \h
 .endm
@@ -59,6 +70,7 @@ function ff_decode_block_coeffs_armv6, export=1
         cmp             r3,  #0
         ldr             r11, [r5]
         ldm             r0,  {r5-r7}                    @ high, bits, buf
+        it              ne
         pkhtbne         r11, r11, r11, asr #16
         ldr             r8,  [r0, #16]                  @ code_word
 0:
@@ -80,19 +92,26 @@ function ff_decode_block_coeffs_armv6, export=1
         adds            r6,  r6,  r9
         add             r4,  r4,  #11
         lsl             r8,  r8,  r9
+        it              cs
         ldrhcs          r10, [r7], #2
         lsl             r9,  r5,  r9
         mov             r5,  #128
+        it              cs
         rev16cs         r10, r10
         add             r5,  r5,  r9,  lsl #7
-        orrcs           r8,  r8,  r10, lsl r6
+T       ittt            cs
+T       lslcs           r10, r10, r6
+T       orrcs           r8,  r8,  r10
+A       orrcs           r8,  r8,  r10, lsl r6
         subcs           r6,  r6,  #16
         lsr             r5,  r5,  #8
         cmp             r8,  r5,  lsl #16
         movrel          r10, zigzag_scan-1
+        itt             ge
         subge           r8,  r8,  r5,  lsl #16
         subge           r5,  r9,  r5
         ldrb            r10, [r10, r3]
+        it              ge
         rsbge           r12, r12, #0
         cmp             r3,  #16
         strh            r12, [r1, r10]
@@ -108,6 +127,7 @@ function ff_decode_block_coeffs_armv6, export=1
         ldr             r0,  [sp]
         ldr             r9,  [r0, #12]
         cmp             r7,  r9
+        it              hi
         movhi           r7,  r9
         stm             r0,  {r5-r7}                    @ high, bits, buf
         str             r8,  [r0, #16]                  @ code_word
@@ -131,11 +151,13 @@ function ff_decode_block_coeffs_armv6, export=1
         mov             r12, #2
         ldrb            r0,  [r4, #4]
         rac_get_prob    r5,  r6,  r7,  r8,  r0,  r9,  r10
+        it              ge
         addge           r12, #1
         ldrb            r9,  [lr, r5]
         blt             4f
         ldrb            r0,  [r4, #5]
         rac_get_prob    r5,  r6,  r7,  r8,  r0,  r9,  r10
+        it              ge
         addge           r12, #1
         ldrb            r9,  [lr, r5]
         b               4f
@@ -153,6 +175,7 @@ function ff_decode_block_coeffs_armv6, export=1
         mov             r12, #5
         mov             r0,  #159
         rac_get_prob    r5,  r6,  r7,  r8,  r0,  r9,  r10
+        it              ge
         addge           r12, r12, #1
         ldrb            r9,  [lr, r5]
         b               4f
@@ -160,23 +183,28 @@ function ff_decode_block_coeffs_armv6, export=1
         mov             r12, #7
         mov             r0,  #165
         rac_get_prob    r5,  r6,  r7,  r8,  r0,  r9,  r10
+        it              ge
         addge           r12, r12, #2
         ldrb            r9,  [lr, r5]
         mov             r0,  #145
         rac_get_prob    r5,  r6,  r7,  r8,  r0,  r9,  r10
+        it              ge
         addge           r12, r12, #1
         ldrb            r9,  [lr, r5]
         b               4f
 3:
         ldrb            r0,  [r4, #8]
         rac_get_prob    r5,  r6,  r7,  r8,  r0,  r9,  r10
+        it              ge
         addge           r4,  r4,  #1
         ldrb            r9,  [lr, r5]
+        ite             ge
         movge           r12, #2
         movlt           r12, #0
         ldrb            r0,  [r4, #9]
         rac_get_prob    r5,  r6,  r7,  r8,  r0,  r9,  r10
         mov             r9,  #8
+        it              ge
         addge           r12, r12, #1
         movrel          r4,  X(ff_vp8_dct_cat_prob)
         lsl             r9,  r9,  r12
@@ -189,6 +217,7 @@ function ff_decode_block_coeffs_armv6, export=1
         lsl             r1,  r1,  #1
         rac_get_prob    r5,  r6,  r7,  r8,  r0,  r9,  r10
         ldrb            r0,  [r4], #1
+        it              ge
         addge           r1,  r1,  #1
         cmp             r0,  #0
         bne             1b
@@ -200,6 +229,7 @@ function ff_decode_block_coeffs_armv6, export=1
         add             r4,  r2,  r4
         add             r4,  r4,  #22
         rac_get_128     r5,  r6,  r7,  r8,  r9,  r10
+        it              ge
         rsbge           r12, r12, #0
         smulbb          r12, r12, r11
         movrel          r9,  zigzag_scan-1
diff --git a/libavcodec/arm/vp8dsp_neon.S b/libavcodec/arm/vp8dsp_neon.S
index 23330900f7..28487e7a60 100644
--- a/libavcodec/arm/vp8dsp_neon.S
+++ b/libavcodec/arm/vp8dsp_neon.S
@@ -746,14 +746,14 @@ function ff_put_vp8_pixels4_neon, export=1
         push            {r4-r6,lr}
 1:
         subs            r12, r12, #4
-        ldr             r4,       [r2], r3
-        ldr             r5,       [r2], r3
-        ldr             r6,       [r2], r3
-        ldr             lr,       [r2], r3
-        str             r4,       [r0], r1
-        str             r5,       [r0], r1
-        str             r6,       [r0], r1
-        str             lr,       [r0], r1
+        ldr_post        r4,  r2,  r3
+        ldr_post        r5,  r2,  r3
+        ldr_post        r6,  r2,  r3
+        ldr_post        lr,  r2,  r3
+        str_post        r4,  r0,  r1
+        str_post        r5,  r0,  r1
+        str_post        r6,  r0,  r1
+        str_post        lr,  r0,  r1
         bgt             1b
         pop             {r4-r6,pc}
 endfunc
diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h
index 90c389b8c5..42eabdd623 100644
--- a/libavcodec/avcodec.h
+++ b/libavcodec/avcodec.h
@@ -3320,7 +3320,7 @@ void av_resample_close(struct AVResampleContext *c);
 /**
  * Allocate memory for a picture.  Call avpicture_free() to free it.
  *
- * \see avpicture_fill()
+ * @see avpicture_fill()
  *
  * @param picture the picture to be filled in
  * @param pix_fmt the format of the picture
@@ -3367,7 +3367,7 @@ int avpicture_fill(AVPicture *picture, uint8_t *ptr,
  * The data is stored compactly, without any gaps for alignment or padding
  * which may be applied by avpicture_fill().
  *
- * \see avpicture_get_size()
+ * @see avpicture_get_size()
  *
  * @param[in] src AVPicture containing image data
  * @param[in] pix_fmt The format in which the picture data is stored.
@@ -3964,7 +3964,7 @@ typedef struct AVCodecParserContext {
     int64_t offset;      ///< byte offset from starting packet start
     int64_t cur_frame_end[AV_PARSER_PTS_NB];
 
-    /*!
+    /**
      * Set by parser to 1 for key frames and 0 for non-key frames.
      * It is initialized to -1, so if the parser doesn't set this flag,
      * old-style fallback using AV_PICTURE_TYPE_I picture type as key frames
@@ -4211,7 +4211,7 @@ void av_log_missing_feature(void *avc, const char *feature, int want_sample);
  * a pointer to an AVClass struct
  * @param[in] msg string containing an optional message, or NULL if no message
  */
-void av_log_ask_for_sample(void *avc, const char *msg, ...);
+void av_log_ask_for_sample(void *avc, const char *msg, ...) av_printf_format(2, 3);
 
 /**
  * Register the hardware accelerator hwaccel.
diff --git a/libavcodec/celp_filters.h b/libavcodec/celp_filters.h
index 145e3d3346..2fb2b03aaa 100644
--- a/libavcodec/celp_filters.h
+++ b/libavcodec/celp_filters.h
@@ -34,7 +34,7 @@
  *
  *  fc_out[n] = sum(i,0,len-1){ fc_in[i] * filter[(len + n - i)%len] }
  *
- * \note fc_in and fc_out should not overlap!
+ * @note fc_in and fc_out should not overlap!
  */
 void ff_celp_convolve_circ(int16_t *fc_out, const int16_t *fc_in,
                            const int16_t *filter, int len);
diff --git a/libavcodec/fft.h b/libavcodec/fft.h
index 24db7e3d24..0e19e947b1 100644
--- a/libavcodec/fft.h
+++ b/libavcodec/fft.h
@@ -119,7 +119,7 @@ extern COSTABLE_CONST FFTSample* const FFT_NAME(ff_cos_tabs)[17];
 
 /**
  * Initialize the cosine table in ff_cos_tabs[index]
- * \param index index in ff_cos_tabs array of the table to initialize
+ * @param index index in ff_cos_tabs array of the table to initialize
  */
 void ff_init_ff_cos_tabs(int index);
 
diff --git a/libavcodec/g729dec.c b/libavcodec/g729dec.c
index 32db0597e3..c4a883f392 100644
--- a/libavcodec/g729dec.c
+++ b/libavcodec/g729dec.c
@@ -116,7 +116,7 @@ static const G729FormatDescription format_g729d_6k4 = {
 };
 
 /**
- * \brief pseudo random number generator
+ * @brief pseudo random number generator
  */
 static inline uint16_t g729_prng(uint16_t value)
 {
diff --git a/libavcodec/h264_parser.c b/libavcodec/h264_parser.c
index 080b6a93b5..27fba4b628 100644
--- a/libavcodec/h264_parser.c
+++ b/libavcodec/h264_parser.c
@@ -97,7 +97,7 @@ found:
     return i-(state&5);
 }
 
-/*!
+/**
  * Parse NAL units of found picture and decode some basic information.
  *
  * @param s parser context.
diff --git a/libavcodec/lagarith.c b/libavcodec/lagarith.c
index 02d3533b0c..5cff77f58c 100644
--- a/libavcodec/lagarith.c
+++ b/libavcodec/lagarith.c
@@ -32,25 +32,25 @@
 #include "lagarithrac.h"
 
 enum LagarithFrameType {
-    FRAME_RAW           = 1,    /*!< uncompressed */
-    FRAME_U_RGB24       = 2,    /*!< unaligned RGB24 */
-    FRAME_ARITH_YUY2    = 3,    /*!< arithmetic coded YUY2 */
-    FRAME_ARITH_RGB24   = 4,    /*!< arithmetic coded RGB24 */
-    FRAME_SOLID_GRAY    = 5,    /*!< solid grayscale color frame */
-    FRAME_SOLID_COLOR   = 6,    /*!< solid non-grayscale color frame */
-    FRAME_OLD_ARITH_RGB = 7,    /*!< obsolete arithmetic coded RGB (no longer encoded by upstream since version 1.1.0) */
-    FRAME_ARITH_RGBA    = 8,    /*!< arithmetic coded RGBA */
-    FRAME_SOLID_RGBA    = 9,    /*!< solid RGBA color frame */
-    FRAME_ARITH_YV12    = 10,   /*!< arithmetic coded YV12 */
-    FRAME_REDUCED_RES   = 11,   /*!< reduced resolution YV12 frame */
+    FRAME_RAW           = 1,    /**< uncompressed */
+    FRAME_U_RGB24       = 2,    /**< unaligned RGB24 */
+    FRAME_ARITH_YUY2    = 3,    /**< arithmetic coded YUY2 */
+    FRAME_ARITH_RGB24   = 4,    /**< arithmetic coded RGB24 */
+    FRAME_SOLID_GRAY    = 5,    /**< solid grayscale color frame */
+    FRAME_SOLID_COLOR   = 6,    /**< solid non-grayscale color frame */
+    FRAME_OLD_ARITH_RGB = 7,    /**< obsolete arithmetic coded RGB (no longer encoded by upstream since version 1.1.0) */
+    FRAME_ARITH_RGBA    = 8,    /**< arithmetic coded RGBA */
+    FRAME_SOLID_RGBA    = 9,    /**< solid RGBA color frame */
+    FRAME_ARITH_YV12    = 10,   /**< arithmetic coded YV12 */
+    FRAME_REDUCED_RES   = 11,   /**< reduced resolution YV12 frame */
 };
 
 typedef struct LagarithContext {
     AVCodecContext *avctx;
     AVFrame picture;
     DSPContext dsp;
-    int zeros;                  /*!< number of consecutive zero bytes encountered */
-    int zeros_rem;              /*!< number of zero bytes remaining to output */
+    int zeros;                  /**< number of consecutive zero bytes encountered */
+    int zeros_rem;              /**< number of zero bytes remaining to output */
 } LagarithContext;
 
 /**
diff --git a/libavcodec/lagarithrac.h b/libavcodec/lagarithrac.h
index 2cb7323076..8c78538f21 100644
--- a/libavcodec/lagarithrac.h
+++ b/libavcodec/lagarithrac.h
@@ -40,15 +40,15 @@ typedef struct lag_rac {
     AVCodecContext *avctx;
     unsigned low;
     unsigned range;
-    unsigned scale;             /*!< Number of bits of precision in range. */
-    unsigned hash_shift;        /*!< Number of bits to shift to calculate hash for radix search. */
+    unsigned scale;             /**< Number of bits of precision in range. */
+    unsigned hash_shift;        /**< Number of bits to shift to calculate hash for radix search. */
 
-    const uint8_t *bytestream_start;  /*!< Start of input bytestream. */
-    const uint8_t *bytestream;        /*!< Current position in input bytestream. */
-    const uint8_t *bytestream_end;    /*!< End position of input bytestream. */
+    const uint8_t *bytestream_start;  /**< Start of input bytestream. */
+    const uint8_t *bytestream;        /**< Current position in input bytestream. */
+    const uint8_t *bytestream_end;    /**< End position of input bytestream. */
 
-    uint32_t prob[258];         /*!< Table of cumulative probability for each symbol. */
-    uint8_t  range_hash[256];   /*!< Hash table mapping upper byte to approximate symbol. */
+    uint32_t prob[258];         /**< Table of cumulative probability for each symbol. */
+    uint8_t  range_hash[256];   /**< Hash table mapping upper byte to approximate symbol. */
 } lag_rac;
 
 void lag_rac_init(lag_rac *l, GetBitContext *gb, int length);
diff --git a/libavcodec/lcldec.c b/libavcodec/lcldec.c
index 57735ac6ff..7359864004 100644
--- a/libavcodec/lcldec.c
+++ b/libavcodec/lcldec.c
@@ -73,8 +73,8 @@ typedef struct LclDecContext {
 
 
 /**
- * \param srcptr compressed source buffer, must be padded with at least 5 extra bytes
- * \param destptr must be padded sufficiently for av_memcpy_backptr
+ * @param srcptr compressed source buffer, must be padded with at least 5 extra bytes
+ * @param destptr must be padded sufficiently for av_memcpy_backptr
  */
 static unsigned int mszh_decomp(const unsigned char * srcptr, int srclen, unsigned char * destptr, unsigned int destsize)
 {
@@ -119,11 +119,11 @@ static unsigned int mszh_decomp(const unsigned char * srcptr, int srclen, unsign
 
 #if CONFIG_ZLIB_DECODER
 /**
- * \brief decompress a zlib-compressed data block into decomp_buf
- * \param src compressed input buffer
- * \param src_len data length in input buffer
- * \param offset offset in decomp_buf
- * \param expected expected decompressed length
+ * @brief decompress a zlib-compressed data block into decomp_buf
+ * @param src compressed input buffer
+ * @param src_len data length in input buffer
+ * @param offset offset in decomp_buf
+ * @param expected expected decompressed length
  */
 static int zlib_decomp(AVCodecContext *avctx, const uint8_t *src, int src_len, int offset, int expected)
 {
diff --git a/libavcodec/lsp.c b/libavcodec/lsp.c
index 98ca490a76..0ff0f0986a 100644
--- a/libavcodec/lsp.c
+++ b/libavcodec/lsp.c
@@ -74,9 +74,9 @@ void ff_acelp_lsf2lspd(double *lsp, const float *lsf, int lp_order)
 }
 
 /**
- * \brief decodes polynomial coefficients from LSP
- * \param f [out] decoded polynomial coefficients (-0x20000000 <= (3.22) <= 0x1fffffff)
- * \param lsp LSP coefficients (-0x8000 <= (0.15) <= 0x7fff)
+ * @brief decodes polynomial coefficients from LSP
+ * @param f [out] decoded polynomial coefficients (-0x20000000 <= (3.22) <= 0x1fffffff)
+ * @param lsp LSP coefficients (-0x8000 <= (0.15) <= 0x7fff)
  */
 static void lsp2poly(int* f, const int16_t* lsp, int lp_half_order)
 {
diff --git a/libavcodec/lsp.h b/libavcodec/lsp.h
index e3af30d300..1230669b1a 100644
--- a/libavcodec/lsp.h
+++ b/libavcodec/lsp.h
@@ -30,12 +30,12 @@
 */
 
 /**
- * \brief ensure a minimum distance between LSFs
- * \param[in,out] lsfq LSF to check and adjust
- * \param lsfq_min_distance minimum distance between LSFs
- * \param lsfq_min minimum allowed LSF value
- * \param lsfq_max maximum allowed LSF value
- * \param lp_order LP filter order
+ * @brief ensure a minimum distance between LSFs
+ * @param[in,out] lsfq LSF to check and adjust
+ * @param lsfq_min_distance minimum distance between LSFs
+ * @param lsfq_min minimum allowed LSF value
+ * @param lsfq_max maximum allowed LSF value
+ * @param lp_order LP filter order
  */
 void ff_acelp_reorder_lsf(int16_t* lsfq, int lsfq_min_distance, int lsfq_min, int lsfq_max, int lp_order);
 
@@ -53,12 +53,12 @@ void ff_acelp_reorder_lsf(int16_t* lsfq, int lsfq_min_distance, int lsfq_min, in
 void ff_set_min_dist_lsf(float *lsf, double min_spacing, int size);
 
 /**
- * \brief Convert LSF to LSP
- * \param[out] lsp LSP coefficients (-0x8000 <= (0.15) < 0x8000)
- * \param lsf normalized LSF coefficients (0 <= (2.13) < 0x2000 * PI)
- * \param lp_order LP filter order
+ * @brief Convert LSF to LSP
+ * @param[out] lsp LSP coefficients (-0x8000 <= (0.15) < 0x8000)
+ * @param lsf normalized LSF coefficients (0 <= (2.13) < 0x2000 * PI)
+ * @param lp_order LP filter order
  *
- * \remark It is safe to pass the same array into the lsf and lsp parameters.
+ * @remark It is safe to pass the same array into the lsf and lsp parameters.
  */
 void ff_acelp_lsf2lsp(int16_t *lsp, const int16_t *lsf, int lp_order);
 
@@ -68,10 +68,10 @@ void ff_acelp_lsf2lsp(int16_t *lsp, const int16_t *lsf, int lp_order);
 void ff_acelp_lsf2lspd(double *lsp, const float *lsf, int lp_order);
 
 /**
- * \brief LSP to LP conversion (3.2.6 of G.729)
- * \param[out] lp decoded LP coefficients (-0x8000 <= (3.12) < 0x8000)
- * \param lsp LSP coefficients (-0x8000 <= (0.15) < 0x8000)
- * \param lp_half_order LP filter order, divided by 2
+ * @brief LSP to LP conversion (3.2.6 of G.729)
+ * @param[out] lp decoded LP coefficients (-0x8000 <= (3.12) < 0x8000)
+ * @param lsp LSP coefficients (-0x8000 <= (0.15) < 0x8000)
+ * @param lp_half_order LP filter order, divided by 2
  */
 void ff_acelp_lsp2lpc(int16_t* lp, const int16_t* lsp, int lp_half_order);
 
@@ -81,12 +81,12 @@ void ff_acelp_lsp2lpc(int16_t* lp, const int16_t* lsp, int lp_half_order);
 void ff_amrwb_lsp2lpc(const double *lsp, float *lp, int lp_order);
 
 /**
- * \brief Interpolate LSP for the first subframe and convert LSP -> LP for both subframes (3.2.5 and 3.2.6 of G.729)
- * \param[out] lp_1st decoded LP coefficients for first subframe  (-0x8000 <= (3.12) < 0x8000)
- * \param[out] lp_2nd decoded LP coefficients for second subframe (-0x8000 <= (3.12) < 0x8000)
- * \param lsp_2nd LSP coefficients of the second subframe (-0x8000 <= (0.15) < 0x8000)
- * \param lsp_prev LSP coefficients from the second subframe of the previous frame (-0x8000 <= (0.15) < 0x8000)
- * \param lp_order LP filter order
+ * @brief Interpolate LSP for the first subframe and convert LSP -> LP for both subframes (3.2.5 and 3.2.6 of G.729)
+ * @param[out] lp_1st decoded LP coefficients for first subframe  (-0x8000 <= (3.12) < 0x8000)
+ * @param[out] lp_2nd decoded LP coefficients for second subframe (-0x8000 <= (3.12) < 0x8000)
+ * @param lsp_2nd LSP coefficients of the second subframe (-0x8000 <= (0.15) < 0x8000)
+ * @param lsp_prev LSP coefficients from the second subframe of the previous frame (-0x8000 <= (0.15) < 0x8000)
+ * @param lp_order LP filter order
  */
 void ff_acelp_lp_decode(int16_t* lp_1st, int16_t* lp_2nd, const int16_t* lsp_2nd, const int16_t* lsp_prev, int lp_order);
 
diff --git a/libavcodec/motion_est.c b/libavcodec/motion_est.c
index c12ebf4c7c..46068bfe3d 100644
--- a/libavcodec/motion_est.c
+++ b/libavcodec/motion_est.c
@@ -248,7 +248,7 @@ static int cmp_internal(MpegEncContext *s, const int x, const int y, const int s
     }
 }
 
-/*! \brief compares a block (either a full macroblock or a partition thereof)
+/** @brief compares a block (either a full macroblock or a partition thereof)
     against a proposed motion-compensated prediction of that block
  */
 static av_always_inline int cmp(MpegEncContext *s, const int x, const int y, const int subx, const int suby,
diff --git a/libavcodec/motion_est_template.c b/libavcodec/motion_est_template.c
index 461e85932b..5d319c5da2 100644
--- a/libavcodec/motion_est_template.c
+++ b/libavcodec/motion_est_template.c
@@ -992,8 +992,8 @@ static av_always_inline int diamond_search(MpegEncContext * s, int *best, int dm
         return   var_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
 }
 
-/*!
-   \param P[10][2] a list of candidate mvs to check before starting the
+/**
+   @param P[10][2] a list of candidate mvs to check before starting the
    iterative search. If one of the candidates is close to the optimal mv, then
    it takes fewer iterations. And it increases the chance that we find the
    optimal mv.
@@ -1003,12 +1003,12 @@ static av_always_inline int epzs_motion_search_internal(MpegEncContext * s, int
                              int ref_mv_scale, int flags, int size, int h)
 {
     MotionEstContext * const c= &s->me;
-    int best[2]={0, 0};      /*!< x and y coordinates of the best motion vector.
+    int best[2]={0, 0};      /**< x and y coordinates of the best motion vector.
                                i.e. the difference between the position of the
                                block currently being encoded and the position of
                                the block chosen to predict it from. */
     int d;                   ///< the score (cmp + penalty) of any given mv
-    int dmin;                /*!< the best value of d, i.e. the score
+    int dmin;                /**< the best value of d, i.e. the score
                                corresponding to the mv stored in best[]. */
     int map_generation;
     int penalty_factor;
diff --git a/libavcodec/mpegvideo.h b/libavcodec/mpegvideo.h
index a0ff354a08..2a54329a49 100644
--- a/libavcodec/mpegvideo.h
+++ b/libavcodec/mpegvideo.h
@@ -154,7 +154,7 @@ typedef struct MotionEstContext{
     uint32_t *score_map;               ///< map to store the scores
     int map_generation;
     int pre_penalty_factor;
-    int penalty_factor;                /*!< an estimate of the bits required to
+    int penalty_factor;                /**< an estimate of the bits required to
                                         code a given mv value, e.g. (1,0) takes
                                         more bits than (0,0). We have to
                                         estimate whether any reduction in
diff --git a/libavcodec/nuv.c b/libavcodec/nuv.c
index 6eb6de3101..f31be59d8d 100644
--- a/libavcodec/nuv.c
+++ b/libavcodec/nuv.c
@@ -63,11 +63,11 @@ static const uint8_t fallback_cquant[] = {
 };
 
 /**
- * \brief copy frame data from buffer to AVFrame, handling stride.
- * \param f destination AVFrame
- * \param src source buffer, does not use any line-stride
- * \param width width of the video frame
- * \param height height of the video frame
+ * @brief copy frame data from buffer to AVFrame, handling stride.
+ * @param f destination AVFrame
+ * @param src source buffer, does not use any line-stride
+ * @param width width of the video frame
+ * @param height height of the video frame
  */
 static void copy_frame(AVFrame *f, const uint8_t *src,
                        int width, int height) {
@@ -77,7 +77,7 @@ static void copy_frame(AVFrame *f, const uint8_t *src,
 }
 
 /**
- * \brief extract quantization tables from codec data into our context
+ * @brief extract quantization tables from codec data into our context
  */
 static int get_quant(AVCodecContext *avctx, NuvContext *c,
                      const uint8_t *buf, int size) {
@@ -94,7 +94,7 @@ static int get_quant(AVCodecContext *avctx, NuvContext *c,
 }
 
 /**
- * \brief set quantization tables from a quality value
+ * @brief set quantization tables from a quality value
  */
 static void get_quant_quality(NuvContext *c, int quality) {
     int i;
diff --git a/libavcodec/qcelpdata.h b/libavcodec/qcelpdata.h
index d79cea9f6c..82cc61d7ea 100644
--- a/libavcodec/qcelpdata.h
+++ b/libavcodec/qcelpdata.h
@@ -74,9 +74,9 @@ typedef struct {
 static const float qcelp_hammsinc_table[4] = { -0.006822,  0.041249, -0.143459,  0.588863};
 
 typedef struct {
-    uint8_t index;  /*!< index into the QCELPContext structure */
-    uint8_t bitpos; /*!< position of the lowest bit in the value's byte */
-    uint8_t bitlen; /*!< number of bits to read */
+    uint8_t index;  /**< index into the QCELPContext structure */
+    uint8_t bitpos; /**< position of the lowest bit in the value's byte */
+    uint8_t bitlen; /**< number of bits to read */
 } QCELPBitmap;
 
 #define QCELP_OF(variable, bit, len) {offsetof(QCELPFrame, variable), bit, len}
diff --git a/libavcodec/qcelpdec.c b/libavcodec/qcelpdec.c
index 3ed821c81e..d565003a9e 100644
--- a/libavcodec/qcelpdec.c
+++ b/libavcodec/qcelpdec.c
@@ -46,7 +46,7 @@
 
 typedef enum
 {
-    I_F_Q = -1,    /*!< insufficient frame quality */
+    I_F_Q = -1,    /**< insufficient frame quality */
     SILENCE,
     RATE_OCTAVE,
     RATE_QUARTER,
@@ -58,12 +58,12 @@ typedef struct
 {
     GetBitContext     gb;
     qcelp_packet_rate bitrate;
-    QCELPFrame        frame;    /*!< unpacked data frame */
+    QCELPFrame        frame;    /**< unpacked data frame */
 
     uint8_t  erasure_count;
-    uint8_t  octave_count;      /*!< count the consecutive RATE_OCTAVE frames */
+    uint8_t  octave_count;      /**< count the consecutive RATE_OCTAVE frames */
     float    prev_lspf[10];
-    float    predictor_lspf[10];/*!< LSP predictor for RATE_OCTAVE and I_F_Q */
+    float    predictor_lspf[10];/**< LSP predictor for RATE_OCTAVE and I_F_Q */
     float    pitch_synthesis_filter_mem[303];
     float    pitch_pre_filter_mem[303];
     float    rnd_fir_filter_mem[180];
diff --git a/libavcodec/rtjpeg.c b/libavcodec/rtjpeg.c
index 4c48f25b2c..303183f230 100644
--- a/libavcodec/rtjpeg.c
+++ b/libavcodec/rtjpeg.c
@@ -33,12 +33,12 @@
     if (n) {skip_bits(gb, n);}
 
 /**
- * \brief read one block from stream
- * \param gb contains stream data
- * \param block where data is written to
- * \param scan array containing the mapping stream address -> block position
- * \param quant quantization factors
- * \return 0 means the block is not coded, < 0 means an error occurred.
+ * @brief read one block from stream
+ * @param gb contains stream data
+ * @param block where data is written to
+ * @param scan array containing the mapping stream address -> block position
+ * @param quant quantization factors
+ * @return 0 means the block is not coded, < 0 means an error occurred.
  *
  * Note: GetBitContext is used to make the code simpler, since all data is
  * aligned this could be done faster in a different way, e.g. as it is done
@@ -96,13 +96,13 @@ static inline int get_block(GetBitContext *gb, DCTELEM *block, const uint8_t *sc
 }
 
 /**
- * \brief decode one rtjpeg YUV420 frame
- * \param c context, must be initialized via rtjpeg_decode_init
- * \param f AVFrame to place decoded frame into. If parts of the frame
+ * @brief decode one rtjpeg YUV420 frame
+ * @param c context, must be initialized via rtjpeg_decode_init
+ * @param f AVFrame to place decoded frame into. If parts of the frame
  *          are not coded they are left unchanged, so consider initializing it
- * \param buf buffer containing input data
- * \param buf_size length of input data in bytes
- * \return number of bytes consumed from the input buffer
+ * @param buf buffer containing input data
+ * @param buf_size length of input data in bytes
+ * @return number of bytes consumed from the input buffer
  */
 int rtjpeg_decode_frame_yuv420(RTJpegContext *c, AVFrame *f,
                                const uint8_t *buf, int buf_size) {
@@ -143,15 +143,15 @@ int rtjpeg_decode_frame_yuv420(RTJpegContext *c, AVFrame *f,
 }
 
 /**
- * \brief initialize an RTJpegContext, may be called multiple times
- * \param c context to initialize
- * \param dsp specifies the idct to use for decoding
- * \param width width of image, will be rounded down to the nearest multiple
+ * @brief initialize an RTJpegContext, may be called multiple times
+ * @param c context to initialize
+ * @param dsp specifies the idct to use for decoding
+ * @param width width of image, will be rounded down to the nearest multiple
  *              of 16 for decoding
- * \param height height of image, will be rounded down to the nearest multiple
+ * @param height height of image, will be rounded down to the nearest multiple
  *              of 16 for decoding
- * \param lquant luma quantization table to use
- * \param cquant chroma quantization table to use
+ * @param lquant luma quantization table to use
+ * @param cquant chroma quantization table to use
  */
 void rtjpeg_decode_init(RTJpegContext *c, DSPContext *dsp,
                         int width, int height,
diff --git a/libavcodec/tableprint.h b/libavcodec/tableprint.h
index d81b9a387b..d3e4dd956f 100644
--- a/libavcodec/tableprint.h
+++ b/libavcodec/tableprint.h
@@ -56,7 +56,7 @@ void write_##type##_2d_array(const void *arg, int len, int len2)\
 }
 
 /**
- * \defgroup printfuncs Predefined functions for printing tables
+ * @defgroup printfuncs Predefined functions for printing tables
  *
  * \{
  */
diff --git a/libavcodec/twinvq.c b/libavcodec/twinvq.c
index f8e75bb933..e7aceebd5b 100644
--- a/libavcodec/twinvq.c
+++ b/libavcodec/twinvq.c
@@ -411,7 +411,7 @@ static inline float mulawinv(float y, float clip, float mu)
  * a*b == 200 and the nearest integer is ill-defined, use a table to emulate
  * the following broken float-based implementation used by the binary decoder:
  *
- * \code
+ * @code
  * static int very_broken_op(int a, int b)
  * {
  *    static float test; // Ugh, force gcc to do the division first...
@@ -419,7 +419,7 @@ static inline float mulawinv(float y, float clip, float mu)
  *    test = a/400.;
  *    return b * test +  0.5;
  * }
- * \endcode
+ * @endcode
  *
  * @note if this function is replaced by just ROUNDED_DIV(a*b,400.), the stddev
  * between the original file (before encoding with Yamaha encoder) and the
@@ -938,14 +938,14 @@ static void permutate_in_line(int16_t *tab, int num_vect, int num_blocks,
 /**
  * Interpret the input data as in the following table:
  *
- * \verbatim
+ * @verbatim
  *
  * abcdefgh
  * ijklmnop
  * qrstuvw
  * x123456
  *
- * \endverbatim
+ * @endverbatim
  *
  * and transpose it, giving the output
  * aiqxbjr1cks2dlt3emu4fvn5gow6hp
diff --git a/libavcodec/vaapi.c b/libavcodec/vaapi.c
index de028a0a7e..774fde840f 100644
--- a/libavcodec/vaapi.c
+++ b/libavcodec/vaapi.c
@@ -24,7 +24,7 @@
 #include "vaapi_internal.h"
 
 /**
- * \addtogroup VAAPI_Decoding
+ * @addtogroup VAAPI_Decoding
  *
  * @{
  */
diff --git a/libavcodec/vaapi.h b/libavcodec/vaapi.h
index 07568a47fc..4c3bb9bb52 100644
--- a/libavcodec/vaapi.h
+++ b/libavcodec/vaapi.h
@@ -27,8 +27,8 @@
 #include <stdint.h>
 
 /**
- * \defgroup VAAPI_Decoding VA API Decoding
- * \ingroup Decoder
+ * @defgroup VAAPI_Decoding VA API Decoding
+ * @ingroup Decoder
  * @{
  */
 
diff --git a/libavcodec/vaapi_internal.h b/libavcodec/vaapi_internal.h
index 2c0fdf945e..43fa889d15 100644
--- a/libavcodec/vaapi_internal.h
+++ b/libavcodec/vaapi_internal.h
@@ -30,7 +30,7 @@
 #include "mpegvideo.h"
 
 /**
- * \addtogroup VAAPI_Decoding
+ * @addtogroup VAAPI_Decoding
  *
  * @{
  */
diff --git a/libavcodec/vdpau.c b/libavcodec/vdpau.c
index 19bd96bc15..9fbcbf9a3f 100644
--- a/libavcodec/vdpau.c
+++ b/libavcodec/vdpau.c
@@ -33,7 +33,7 @@
 #include "vdpau_internal.h"
 
 /**
- * \addtogroup VDPAU_Decoding
+ * @addtogroup VDPAU_Decoding
  *
  * @{
  */
diff --git a/libavcodec/vdpau.h b/libavcodec/vdpau.h
index 0dc6fb850b..f3a547184d 100644
--- a/libavcodec/vdpau.h
+++ b/libavcodec/vdpau.h
@@ -25,7 +25,7 @@
 #define AVCODEC_VDPAU_H
 
 /**
- * \defgroup Decoder VDPAU Decoder and Renderer
+ * @defgroup Decoder VDPAU Decoder and Renderer
  *
  * VDPAU hardware acceleration has two modules
  * - VDPAU decoding
@@ -38,25 +38,25 @@
  * and rendering (API calls) are done as part of the VDPAU
  * presentation (vo_vdpau.c) module.
  *
- * \defgroup  VDPAU_Decoding VDPAU Decoding
- * \ingroup Decoder
+ * @defgroup  VDPAU_Decoding VDPAU Decoding
+ * @ingroup Decoder
  * @{
  */
 
 #include <vdpau/vdpau.h>
 #include <vdpau/vdpau_x11.h>
 
-/** \brief The videoSurface is used for rendering. */
+/** @brief The videoSurface is used for rendering. */
 #define FF_VDPAU_STATE_USED_FOR_RENDER 1
 
 /**
- * \brief The videoSurface is needed for reference/prediction.
+ * @brief The videoSurface is needed for reference/prediction.
  * The codec manipulates this.
  */
 #define FF_VDPAU_STATE_USED_FOR_REFERENCE 2
 
 /**
- * \brief This structure is used as a callback between the FFmpeg
+ * @brief This structure is used as a callback between the FFmpeg
  * decoder (vd_) and presentation (vo_) module.
  * This is used for defining a video frame containing surface,
  * picture parameter, bitstream information etc which are passed
diff --git a/libavcodec/x86/idct_sse2_xvid.c b/libavcodec/x86/idct_sse2_xvid.c
index 5185d61e54..fc75a57519 100644
--- a/libavcodec/x86/idct_sse2_xvid.c
+++ b/libavcodec/x86/idct_sse2_xvid.c
@@ -43,7 +43,7 @@
 #include "idct_xvid.h"
 #include "dsputil_mmx.h"
 
-/*!
+/**
  * @file
  * @brief SSE2 idct compatible with xvidmmx
  */
diff --git a/libavcodec/x86/idct_xvid.h b/libavcodec/x86/idct_xvid.h
index 5fdc20d3ea..be91d1c68a 100644
--- a/libavcodec/x86/idct_xvid.h
+++ b/libavcodec/x86/idct_xvid.h
@@ -18,7 +18,7 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-/*!
+/**
  * @file
  * header for Xvid IDCT functions
  */
diff --git a/libavcodec/xsubenc.c b/libavcodec/xsubenc.c
index a7e3a891d4..0e950d1856 100644
--- a/libavcodec/xsubenc.c
+++ b/libavcodec/xsubenc.c
@@ -36,8 +36,8 @@
 
 /**
  * Encode a single color run. At most 16 bits will be used.
- * \param len   length of the run, values > 255 mean "until end of line", may not be < 0.
- * \param color color to encode, only the lowest two bits are used and all others must be 0.
+ * @param len   length of the run, values > 255 mean "until end of line", may not be < 0.
+ * @param color color to encode, only the lowest two bits are used and all others must be 0.
  */
 static void put_xsub_rle(PutBitContext *pb, int len, int color)
 {
diff --git a/libavdevice/alsa-audio-common.c b/libavdevice/alsa-audio-common.c
index 668ca02201..8c5be3c864 100644
--- a/libavdevice/alsa-audio-common.c
+++ b/libavdevice/alsa-audio-common.c
@@ -30,6 +30,7 @@
 
 #include <alsa/asoundlib.h>
 #include "avdevice.h"
+#include "libavutil/avassert.h"
 
 #include "alsa-audio.h"
 
@@ -64,7 +65,7 @@ static av_cold snd_pcm_format_t codec_id_to_pcm_format(int codec_id)
 static void alsa_reorder_ ## NAME ## _out_50(const void *in_v, void *out_v, int n) \
 { \
     const TYPE *in = in_v; \
-    TYPE * out = out_v; \
+    TYPE      *out = out_v; \
 \
     while (n-- > 0) { \
         out[0] = in[0]; \
@@ -81,7 +82,7 @@ static void alsa_reorder_ ## NAME ## _out_50(const void *in_v, void *out_v, int
 static void alsa_reorder_ ## NAME ## _out_51(const void *in_v, void *out_v, int n) \
 { \
     const TYPE *in = in_v; \
-    TYPE * out = out_v; \
+    TYPE      *out = out_v; \
 \
     while (n-- > 0) { \
         out[0] = in[0]; \
@@ -99,7 +100,7 @@ static void alsa_reorder_ ## NAME ## _out_51(const void *in_v, void *out_v, int
 static void alsa_reorder_ ## NAME ## _out_71(const void *in_v, void *out_v, int n) \
 { \
     const TYPE *in = in_v; \
-    TYPE * out = out_v; \
+    TYPE      *out = out_v; \
 \
     while (n-- > 0) { \
         out[0] = in[0]; \
@@ -128,57 +129,57 @@ REORDER_OUT_50(f32, float)
 REORDER_OUT_51(f32, float)
 REORDER_OUT_71(f32, float)
 
-#define REORDER_DUMMY ((void *)1)
+#define FORMAT_I8  0
+#define FORMAT_I16 1
+#define FORMAT_I32 2
+#define FORMAT_F32 3
+
+#define PICK_REORDER(layout)\
+switch(format) {\
+    case FORMAT_I8:  s->reorder_func = alsa_reorder_int8_out_ ##layout;  break;\
+    case FORMAT_I16: s->reorder_func = alsa_reorder_int16_out_ ##layout; break;\
+    case FORMAT_I32: s->reorder_func = alsa_reorder_int32_out_ ##layout; break;\
+    case FORMAT_F32: s->reorder_func = alsa_reorder_f32_out_ ##layout;   break;\
+}
 
-static av_cold ff_reorder_func find_reorder_func(int codec_id,
-                                                 int64_t layout,
-                                                 int out)
+static av_cold int find_reorder_func(AlsaData *s, int codec_id, int64_t layout, int out)
 {
-    return
-    codec_id == CODEC_ID_PCM_U8   || codec_id == CODEC_ID_PCM_S8 ||
-    codec_id == CODEC_ID_PCM_ALAW || codec_id == CODEC_ID_PCM_MULAW ?
-        layout == AV_CH_LAYOUT_QUAD || layout == AV_CH_LAYOUT_2_2 ?
-            REORDER_DUMMY :
-        layout == AV_CH_LAYOUT_5POINT0_BACK || layout == AV_CH_LAYOUT_5POINT0 ?
-            out ? alsa_reorder_int8_out_50 : NULL :
-        layout == AV_CH_LAYOUT_5POINT1_BACK || layout == AV_CH_LAYOUT_5POINT1 ?
-            out ? alsa_reorder_int8_out_51 : NULL :
-        layout == AV_CH_LAYOUT_7POINT1 ?
-            out ? alsa_reorder_int8_out_71 : NULL :
-            NULL :
-    codec_id == CODEC_ID_PCM_U16LE || codec_id == CODEC_ID_PCM_U16BE ||
-    codec_id == CODEC_ID_PCM_S16LE || codec_id == CODEC_ID_PCM_S16BE ?
-        layout == AV_CH_LAYOUT_QUAD || layout == AV_CH_LAYOUT_2_2 ?
-            REORDER_DUMMY :
-        layout == AV_CH_LAYOUT_5POINT0_BACK || layout == AV_CH_LAYOUT_5POINT0 ?
-            out ? alsa_reorder_int16_out_50 : NULL :
-        layout == AV_CH_LAYOUT_5POINT1_BACK || layout == AV_CH_LAYOUT_5POINT1 ?
-            out ? alsa_reorder_int16_out_51 : NULL :
-        layout == AV_CH_LAYOUT_7POINT1 ?
-            out ? alsa_reorder_int16_out_71 : NULL :
-            NULL :
-    codec_id == CODEC_ID_PCM_U32LE || codec_id == CODEC_ID_PCM_U32BE ||
-    codec_id == CODEC_ID_PCM_S32LE || codec_id == CODEC_ID_PCM_S32BE ?
-        layout == AV_CH_LAYOUT_QUAD || layout == AV_CH_LAYOUT_2_2 ?
-            REORDER_DUMMY :
-        layout == AV_CH_LAYOUT_5POINT0_BACK || layout == AV_CH_LAYOUT_5POINT0 ?
-            out ? alsa_reorder_int32_out_50 : NULL :
-        layout == AV_CH_LAYOUT_5POINT1_BACK || layout == AV_CH_LAYOUT_5POINT1 ?
-            out ? alsa_reorder_int32_out_51 : NULL :
-        layout == AV_CH_LAYOUT_7POINT1 ?
-            out ? alsa_reorder_int32_out_71 : NULL :
-            NULL :
-    codec_id == CODEC_ID_PCM_F32LE || codec_id == CODEC_ID_PCM_F32BE ?
-        layout == AV_CH_LAYOUT_QUAD || layout == AV_CH_LAYOUT_2_2 ?
-            REORDER_DUMMY :
-        layout == AV_CH_LAYOUT_5POINT0_BACK || layout == AV_CH_LAYOUT_5POINT0 ?
-            out ? alsa_reorder_f32_out_50 : NULL :
-        layout == AV_CH_LAYOUT_5POINT1_BACK || layout == AV_CH_LAYOUT_5POINT1 ?
-            out ? alsa_reorder_f32_out_51 : NULL :
-        layout == AV_CH_LAYOUT_7POINT1 ?
-            out ? alsa_reorder_f32_out_71 : NULL :
-            NULL :
-        NULL;
+    int format;
+
+    /* reordering input is not currently supported */
+    if (!out)
+        return AVERROR(ENOSYS);
+
+    /* reordering is not needed for QUAD or 2_2 layout */
+    if (layout == AV_CH_LAYOUT_QUAD || layout == AV_CH_LAYOUT_2_2)
+        return 0;
+
+    switch (codec_id) {
+    case CODEC_ID_PCM_S8:
+    case CODEC_ID_PCM_U8:
+    case CODEC_ID_PCM_ALAW:
+    case CODEC_ID_PCM_MULAW: format = FORMAT_I8;  break;
+    case CODEC_ID_PCM_S16LE:
+    case CODEC_ID_PCM_S16BE:
+    case CODEC_ID_PCM_U16LE:
+    case CODEC_ID_PCM_U16BE: format = FORMAT_I16; break;
+    case CODEC_ID_PCM_S32LE:
+    case CODEC_ID_PCM_S32BE:
+    case CODEC_ID_PCM_U32LE:
+    case CODEC_ID_PCM_U32BE: format = FORMAT_I32; break;
+    case CODEC_ID_PCM_F32LE:
+    case CODEC_ID_PCM_F32BE: format = FORMAT_F32; break;
+    default:                 return AVERROR(ENOSYS);
+    }
+
+    if      (layout == AV_CH_LAYOUT_5POINT0_BACK || layout == AV_CH_LAYOUT_5POINT0)
+        PICK_REORDER(50)
+    else if (layout == AV_CH_LAYOUT_5POINT1_BACK || layout == AV_CH_LAYOUT_5POINT1)
+        PICK_REORDER(51)
+    else if (layout == AV_CH_LAYOUT_7POINT1)
+        PICK_REORDER(71)
+
+    return s->reorder_func ? 0 : AVERROR(ENOSYS);
 }
 
 av_cold int ff_alsa_open(AVFormatContext *ctx, snd_pcm_stream_t mode,
@@ -286,22 +287,17 @@ av_cold int ff_alsa_open(AVFormatContext *ctx, snd_pcm_stream_t mode,
     snd_pcm_hw_params_free(hw_params);
 
     if (channels > 2 && layout) {
-        s->reorder_func = find_reorder_func(*codec_id, layout,
-                                            mode == SND_PCM_STREAM_PLAYBACK);
-        if (s->reorder_func == REORDER_DUMMY) {
-            s->reorder_func = NULL;
-        } else if (s->reorder_func) {
+        if (find_reorder_func(s, *codec_id, layout, mode == SND_PCM_STREAM_PLAYBACK) < 0) {
+            char name[128];
+            av_get_channel_layout_string(name, sizeof(name), channels, layout);
+            av_log(ctx, AV_LOG_WARNING, "ALSA channel layout unknown or unimplemented for %s %s.\n",
+                   name, mode == SND_PCM_STREAM_PLAYBACK ? "playback" : "capture");
+        }
+        if (s->reorder_func) {
             s->reorder_buf_size = buffer_size;
             s->reorder_buf = av_malloc(s->reorder_buf_size * s->frame_size);
             if (!s->reorder_buf)
                 goto fail1;
-        } else {
-            char name[32];
-            av_get_channel_layout_string(name, sizeof(name), channels, layout);
-            av_log(ctx, AV_LOG_WARNING,
-                   "ALSA channel layout unknown or unimplemented for %s %s.\n",
-                   name,
-                   mode == SND_PCM_STREAM_PLAYBACK ? "playback" : "capture");
         }
     }
 
@@ -350,6 +346,7 @@ int ff_alsa_extend_reorder_buf(AlsaData *s, int min_size)
     int size = s->reorder_buf_size;
     void *r;
 
+    av_assert0(size != 0);
     while (size < min_size)
         size *= 2;
     r = av_realloc(s->reorder_buf, size * s->frame_size);
diff --git a/libavdevice/alsa-audio.h b/libavdevice/alsa-audio.h
index 431401bb13..0f467e34c9 100644
--- a/libavdevice/alsa-audio.h
+++ b/libavdevice/alsa-audio.h
@@ -47,11 +47,11 @@ typedef struct {
     snd_pcm_t *h;
     int frame_size;  ///< preferred size for reads and writes
     int period_size; ///< bytes per sample * channels
-    ff_reorder_func reorder_func;
-    void *reorder_buf;
-    int reorder_buf_size; ///< in frames
     int sample_rate; ///< sample rate set by user
     int channels;    ///< number of channels set by user
+    void (*reorder_func)(const void *, void *, int);
+    void *reorder_buf;
+    int reorder_buf_size; ///< in frames
 } AlsaData;
 
 /**
diff --git a/libavdevice/bktr.c b/libavdevice/bktr.c
index f6216e0aa3..9c1afe518b 100644
--- a/libavdevice/bktr.c
+++ b/libavdevice/bktr.c
@@ -248,7 +248,7 @@ static int grab_read_header(AVFormatContext *s1, AVFormatParameters *ap)
     VideoData *s = s1->priv_data;
     AVStream *st;
     int width, height;
-    AVRational fps;
+    AVRational framerate;
     int ret = 0;
 
 #if FF_API_FORMAT_PARAMETERS
@@ -263,7 +263,7 @@ static int grab_read_header(AVFormatContext *s1, AVFormatParameters *ap)
 #endif
 
     if ((ret = av_parse_video_size(&width, &height, s->video_size)) < 0) {
-        av_log(s1, AV_LOG_ERROR, "Couldn't parse video size.\n");
+        av_log(s1, AV_LOG_ERROR, "Could not parse video size '%s'.\n", s->video_size);
         goto out;
     }
 
@@ -277,8 +277,8 @@ static int grab_read_header(AVFormatContext *s1, AVFormatParameters *ap)
             ret = AVERROR(EINVAL);
             goto out;
         }
-    if ((ret = av_parse_video_rate(&fps, s->framerate)) < 0) {
-        av_log(s1, AV_LOG_ERROR, "Couldn't parse framerate.\n");
+    if ((ret = av_parse_video_rate(&framerate, s->framerate)) < 0) {
+        av_log(s1, AV_LOG_ERROR, "Could not parse framerate '%s'.\n", s->framerate);
         goto out;
     }
 #if FF_API_FORMAT_PARAMETERS
@@ -287,7 +287,7 @@ static int grab_read_header(AVFormatContext *s1, AVFormatParameters *ap)
     if (ap->height > 0)
         height = ap->height;
     if (ap->time_base.num)
-        fps = (AVRational){ap->time_base.den, ap->time_base.num};
+        framerate = (AVRational){ap->time_base.den, ap->time_base.num};
 #endif
 
     st = av_new_stream(s1, 0);
@@ -299,15 +299,15 @@ static int grab_read_header(AVFormatContext *s1, AVFormatParameters *ap)
 
     s->width = width;
     s->height = height;
-    s->per_frame = ((uint64_t)1000000 * fps.den) / fps.num;
+    s->per_frame = ((uint64_t)1000000 * framerate.den) / framerate.num;
 
     st->codec->codec_type = AVMEDIA_TYPE_VIDEO;
     st->codec->pix_fmt = PIX_FMT_YUV420P;
     st->codec->codec_id = CODEC_ID_RAWVIDEO;
     st->codec->width = width;
     st->codec->height = height;
-    st->codec->time_base.den = fps.num;
-    st->codec->time_base.num = fps.den;
+    st->codec->time_base.den = framerate.num;
+    st->codec->time_base.num = framerate.den;
 
 
     if (bktr_init(s1->filename, width, height, s->standard,
diff --git a/libavdevice/fbdev.c b/libavdevice/fbdev.c
index d5ba561db8..a10d8c48c1 100644
--- a/libavdevice/fbdev.c
+++ b/libavdevice/fbdev.c
@@ -79,7 +79,7 @@ static enum PixelFormat get_pixfmt_from_fb_varinfo(struct fb_var_screeninfo *var
 typedef struct {
     AVClass *class;          ///< class for private options
     int frame_size;          ///< size in bytes of a grabbed frame
-    AVRational fps;          ///< framerate
+    AVRational framerate_q;  ///< framerate
     char *framerate;         ///< framerate string set by a private option
     int64_t time_frame;      ///< time for the next frame to output (in 1/1000000 units)
 
@@ -102,14 +102,14 @@ av_cold static int fbdev_read_header(AVFormatContext *avctx,
     enum PixelFormat pix_fmt;
     int ret, flags = O_RDONLY;
 
-    ret = av_parse_video_rate(&fbdev->fps, fbdev->framerate);
+    ret = av_parse_video_rate(&fbdev->framerate_q, fbdev->framerate);
     if (ret < 0) {
-        av_log(avctx, AV_LOG_ERROR, "Couldn't parse framerate.\n");
+        av_log(avctx, AV_LOG_ERROR, "Could not parse framerate '%s'.\n", fbdev->framerate);
         return ret;
     }
 #if FF_API_FORMAT_PARAMETERS
     if (ap->time_base.num)
-        fbdev->fps = (AVRational){ap->time_base.den, ap->time_base.num};
+        fbdev->framerate_q = (AVRational){ap->time_base.den, ap->time_base.num};
 #endif
 
     if (!(st = av_new_stream(avctx, 0)))
@@ -168,15 +168,15 @@ av_cold static int fbdev_read_header(AVFormatContext *avctx,
     st->codec->width      = fbdev->width;
     st->codec->height     = fbdev->heigth;
     st->codec->pix_fmt    = pix_fmt;
-    st->codec->time_base  = (AVRational){fbdev->fps.den, fbdev->fps.num};
+    st->codec->time_base  = (AVRational){fbdev->framerate_q.den, fbdev->framerate_q.num};
     st->codec->bit_rate   =
-        fbdev->width * fbdev->heigth * fbdev->bytes_per_pixel * av_q2d(fbdev->fps) * 8;
+        fbdev->width * fbdev->heigth * fbdev->bytes_per_pixel * av_q2d(fbdev->framerate_q) * 8;
 
     av_log(avctx, AV_LOG_INFO,
            "w:%d h:%d bpp:%d pixfmt:%s fps:%d/%d bit_rate:%d\n",
            fbdev->width, fbdev->heigth, fbdev->varinfo.bits_per_pixel,
            av_pix_fmt_descriptors[pix_fmt].name,
-           fbdev->fps.num, fbdev->fps.den,
+           fbdev->framerate_q.num, fbdev->framerate_q.den,
            st->codec->bit_rate);
     return 0;
 
@@ -204,7 +204,7 @@ static int fbdev_read_packet(AVFormatContext *avctx, AVPacket *pkt)
                 "time_frame:%"PRId64" curtime:%"PRId64" delay:%"PRId64"\n",
                 fbdev->time_frame, curtime, delay);
         if (delay <= 0) {
-            fbdev->time_frame += INT64_C(1000000) / av_q2d(fbdev->fps);
+            fbdev->time_frame += INT64_C(1000000) / av_q2d(fbdev->framerate_q);
             break;
         }
         if (avctx->flags & AVFMT_FLAG_NONBLOCK)
diff --git a/libavdevice/v4l2.c b/libavdevice/v4l2.c
index 7223654891..468c133f60 100644
--- a/libavdevice/v4l2.c
+++ b/libavdevice/v4l2.c
@@ -439,19 +439,19 @@ static int v4l2_set_parameters(AVFormatContext *s1, AVFormatParameters *ap)
     struct v4l2_streamparm streamparm = {0};
     struct v4l2_fract *tpf = &streamparm.parm.capture.timeperframe;
     int i, ret;
-    AVRational fps;
+    AVRational framerate_q;
 
     streamparm.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
 
-    if (s->framerate && (ret = av_parse_video_rate(&fps, s->framerate)) < 0) {
-        av_log(s1, AV_LOG_ERROR, "Couldn't parse framerate.\n");
+    if (s->framerate && (ret = av_parse_video_rate(&framerate_q, s->framerate)) < 0) {
+        av_log(s1, AV_LOG_ERROR, "Could not parse framerate '%s'.\n", s->framerate);
         return ret;
     }
 #if FF_API_FORMAT_PARAMETERS
     if (ap->channel > 0)
         s->channel = ap->channel;
     if (ap->time_base.num)
-        fps = (AVRational){ap->time_base.den, ap->time_base.num};
+        framerate_q = (AVRational){ap->time_base.den, ap->time_base.num};
 #endif
 
     /* set tv video input */
@@ -500,23 +500,23 @@ static int v4l2_set_parameters(AVFormatContext *s1, AVFormatParameters *ap)
         }
     }
 
-    if (fps.num && fps.den) {
+    if (framerate_q.num && framerate_q.den) {
         av_log(s1, AV_LOG_DEBUG, "Setting time per frame to %d/%d\n",
-               fps.den, fps.num);
-        tpf->numerator   = fps.den;
-        tpf->denominator = fps.num;
+               framerate_q.den, framerate_q.num);
+        tpf->numerator   = framerate_q.den;
+        tpf->denominator = framerate_q.num;
         if (ioctl(s->fd, VIDIOC_S_PARM, &streamparm) != 0) {
             av_log(s1, AV_LOG_ERROR,
                    "ioctl set time per frame(%d/%d) failed\n",
-                   fps.den, fps.num);
+                   framerate_q.den, framerate_q.num);
             return AVERROR(EIO);
         }
 
-        if (fps.num != tpf->denominator ||
-            fps.den != tpf->numerator) {
+        if (framerate_q.num != tpf->denominator ||
+            framerate_q.den != tpf->numerator) {
             av_log(s1, AV_LOG_INFO,
                    "The driver changed the time per frame from %d/%d to %d/%d\n",
-                   fps.den, fps.num,
+                   framerate_q.den, framerate_q.num,
                    tpf->numerator, tpf->denominator);
         }
     } else {
@@ -581,7 +581,7 @@ static int v4l2_read_header(AVFormatContext *s1, AVFormatParameters *ap)
     av_set_pts_info(st, 64, 1, 1000000); /* 64 bits pts in us */
 
     if (s->video_size && (res = av_parse_video_size(&s->width, &s->height, s->video_size)) < 0) {
-        av_log(s1, AV_LOG_ERROR, "Couldn't parse video size.\n");
+        av_log(s1, AV_LOG_ERROR, "Could not parse video size '%s'.\n", s->video_size);
         goto out;
     }
     if (s->pixel_format && (pix_fmt = av_get_pix_fmt(s->pixel_format)) == PIX_FMT_NONE) {
diff --git a/libavdevice/vfwcap.c b/libavdevice/vfwcap.c
index a8e67e7dda..7279817529 100644
--- a/libavdevice/vfwcap.c
+++ b/libavdevice/vfwcap.c
@@ -247,7 +247,7 @@ static int vfw_read_header(AVFormatContext *s, AVFormatParameters *ap)
     DWORD biCompression;
     WORD biBitCount;
     int ret;
-    AVRational fps;
+    AVRational framerate_q;
 
     if (!strcmp(s->filename, "list")) {
         for (devnum = 0; devnum <= 9; devnum++) {
@@ -267,7 +267,7 @@ static int vfw_read_header(AVFormatContext *s, AVFormatParameters *ap)
 
 #if FF_API_FORMAT_PARAMETERS
     if (ap->time_base.num)
-        fps = (AVRational){ap->time_base.den, ap->time_base.num};
+        framerate_q = (AVRational){ap->time_base.den, ap->time_base.num};
 #endif
 
     ctx->hwnd = capCreateCaptureWindow(NULL, 0, 0, 0, 0, 0, HWND_MESSAGE, 0);
@@ -367,7 +367,7 @@ static int vfw_read_header(AVFormatContext *s, AVFormatParameters *ap)
 
     cparms.fYield = 1; // Spawn a background thread
     cparms.dwRequestMicroSecPerFrame =
-                               (fps.den*1000000) / fps.num;
+                               (framerate_q.den*1000000) / framerate_q.num;
     cparms.fAbortLeftMouse = 0;
     cparms.fAbortRightMouse = 0;
     cparms.fCaptureAudio = 0;
@@ -379,7 +379,7 @@ static int vfw_read_header(AVFormatContext *s, AVFormatParameters *ap)
         goto fail_io;
 
     codec = st->codec;
-    codec->time_base = (AVRational){fps.den, fps.num};
+    codec->time_base = (AVRational){framerate_q.den, framerate_q.num};
     codec->codec_type = AVMEDIA_TYPE_VIDEO;
     codec->width  = bi->bmiHeader.biWidth;
     codec->height = bi->bmiHeader.biHeight;
diff --git a/libavformat/asfcrypt.c b/libavformat/asfcrypt.c
index 59986e0a2c..750758d822 100644
--- a/libavformat/asfcrypt.c
+++ b/libavformat/asfcrypt.c
@@ -28,9 +28,9 @@
 #include "asfcrypt.h"
 
 /**
- * \brief find multiplicative inverse modulo 2 ^ 32
- * \param v number to invert, must be odd!
- * \return number so that result * v = 1 (mod 2^32)
+ * @brief find multiplicative inverse modulo 2 ^ 32
+ * @param v number to invert, must be odd!
+ * @return number so that result * v = 1 (mod 2^32)
  */
 static uint32_t inverse(uint32_t v) {
     // v ^ 3 gives the inverse (mod 16), could also be implemented
@@ -45,9 +45,9 @@ static uint32_t inverse(uint32_t v) {
 }
 
 /**
- * \brief read keys from keybuf into keys
- * \param keybuf buffer containing the keys
- * \param keys output key array containing the keys for encryption in
+ * @brief read keys from keybuf into keys
+ * @param keybuf buffer containing the keys
+ * @param keys output key array containing the keys for encryption in
  *             native endianness
  */
 static void multiswap_init(const uint8_t keybuf[48], uint32_t keys[12]) {
@@ -57,9 +57,9 @@ static void multiswap_init(const uint8_t keybuf[48], uint32_t keys[12]) {
 }
 
 /**
- * \brief invert the keys so that encryption become decryption keys and
+ * @brief invert the keys so that encryption become decryption keys and
  *        the other way round.
- * \param keys key array of ints to invert
+ * @param keys key array of ints to invert
  */
 static void multiswap_invert_keys(uint32_t keys[12]) {
     int i;
@@ -92,12 +92,12 @@ static uint32_t multiswap_inv_step(const uint32_t keys[12], uint32_t v) {
 }
 
 /**
- * \brief "MultiSwap" encryption
- * \param keys 32 bit numbers in machine endianness,
+ * @brief "MultiSwap" encryption
+ * @param keys 32 bit numbers in machine endianness,
  *             0-4 and 6-10 must be inverted from decryption
- * \param key another key, this one must be the same for the decryption
- * \param data data to encrypt
- * \return encrypted data
+ * @param key another key, this one must be the same for the decryption
+ * @param data data to encrypt
+ * @return encrypted data
  */
 static uint64_t multiswap_enc(const uint32_t keys[12], uint64_t key, uint64_t data) {
     uint32_t a = data;
@@ -114,12 +114,12 @@ static uint64_t multiswap_enc(const uint32_t keys[12], uint64_t key, uint64_t da
 }
 
 /**
- * \brief "MultiSwap" decryption
- * \param keys 32 bit numbers in machine endianness,
+ * @brief "MultiSwap" decryption
+ * @param keys 32 bit numbers in machine endianness,
  *             0-4 and 6-10 must be inverted from encryption
- * \param key another key, this one must be the same as for the encryption
- * \param data data to decrypt
- * \return decrypted data
+ * @param key another key, this one must be the same as for the encryption
+ * @param data data to decrypt
+ * @return decrypted data
  */
 static uint64_t multiswap_dec(const uint32_t keys[12], uint64_t key, uint64_t data) {
     uint32_t a;
diff --git a/libavformat/avio.h b/libavformat/avio.h
index bf1ea30bb3..ed78ffaad9 100644
--- a/libavformat/avio.h
+++ b/libavformat/avio.h
@@ -285,11 +285,7 @@ attribute_deprecated int64_t url_fsize(AVIOContext *s);
 #define URL_EOF (-1)
 attribute_deprecated int url_fgetc(AVIOContext *s);
 attribute_deprecated int url_setbufsize(AVIOContext *s, int buf_size);
-#ifdef __GNUC__
-attribute_deprecated int url_fprintf(AVIOContext *s, const char *fmt, ...) __attribute__ ((__format__ (__printf__, 2, 3)));
-#else
-attribute_deprecated int url_fprintf(AVIOContext *s, const char *fmt, ...);
-#endif
+attribute_deprecated int url_fprintf(AVIOContext *s, const char *fmt, ...) av_printf_format(2, 3);
 attribute_deprecated void put_flush_packet(AVIOContext *s);
 attribute_deprecated int url_open_dyn_buf(AVIOContext **s);
 attribute_deprecated int url_open_dyn_packet_buf(AVIOContext **s, int max_packet_size);
@@ -463,11 +459,7 @@ int64_t avio_size(AVIOContext *s);
 int url_feof(AVIOContext *s);
 
 /** @warning currently size is limited */
-#ifdef __GNUC__
-int avio_printf(AVIOContext *s, const char *fmt, ...) __attribute__ ((__format__ (__printf__, 2, 3)));
-#else
-int avio_printf(AVIOContext *s, const char *fmt, ...);
-#endif
+int avio_printf(AVIOContext *s, const char *fmt, ...) av_printf_format(2, 3);
 
 void avio_flush(AVIOContext *s);
 
diff --git a/libavformat/gxf.c b/libavformat/gxf.c
index e278b9b846..7002a1d4ec 100644
--- a/libavformat/gxf.c
+++ b/libavformat/gxf.c
@@ -32,11 +32,11 @@ struct gxf_stream_info {
 };
 
 /**
- * \brief parses a packet header, extracting type and length
- * \param pb AVIOContext to read header from
- * \param type detected packet type is stored here
- * \param length detected packet length, excluding header is stored here
- * \return 0 if header not found or contains invalid data, 1 otherwise
+ * @brief parses a packet header, extracting type and length
+ * @param pb AVIOContext to read header from
+ * @param type detected packet type is stored here
+ * @param length detected packet length, excluding header is stored here
+ * @return 0 if header not found or contains invalid data, 1 otherwise
  */
 static int parse_packet_header(AVIOContext *pb, GXFPktType *type, int *length) {
     if (avio_rb32(pb))
@@ -58,7 +58,7 @@ static int parse_packet_header(AVIOContext *pb, GXFPktType *type, int *length) {
 }
 
 /**
- * \brief check if file starts with a PKT_MAP header
+ * @brief check if file starts with a PKT_MAP header
  */
 static int gxf_probe(AVProbeData *p) {
     static const uint8_t startcode[] = {0, 0, 0, 0, 1, 0xbc}; // start with map packet
@@ -70,10 +70,10 @@ static int gxf_probe(AVProbeData *p) {
 }
 
 /**
- * \brief gets the stream index for the track with the specified id, creates new
+ * @brief gets the stream index for the track with the specified id, creates new
  *        stream if not found
- * \param id     id of stream to find / add
- * \param format stream format identifier
+ * @param id     id of stream to find / add
+ * @param format stream format identifier
  */
 static int get_sindex(AVFormatContext *s, int id, int format) {
     int i;
@@ -153,9 +153,9 @@ static int get_sindex(AVFormatContext *s, int id, int format) {
 }
 
 /**
- * \brief filters out interesting tags from material information.
- * \param len length of tag section, will be adjusted to contain remaining bytes
- * \param si struct to store collected information into
+ * @brief filters out interesting tags from material information.
+ * @param len length of tag section, will be adjusted to contain remaining bytes
+ * @param si struct to store collected information into
  */
 static void gxf_material_tags(AVIOContext *pb, int *len, struct gxf_stream_info *si) {
     si->first_field = AV_NOPTS_VALUE;
@@ -179,9 +179,9 @@ static void gxf_material_tags(AVIOContext *pb, int *len, struct gxf_stream_info
 }
 
 /**
- * \brief convert fps tag value to AVRational fps
- * \param fps fps value from tag
- * \return fps as AVRational, or 0 / 0 if unknown
+ * @brief convert fps tag value to AVRational fps
+ * @param fps fps value from tag
+ * @return fps as AVRational, or 0 / 0 if unknown
  */
 static AVRational fps_tag2avr(int32_t fps) {
     extern const AVRational ff_frame_rate_tab[];
@@ -190,9 +190,9 @@ static AVRational fps_tag2avr(int32_t fps) {
 }
 
 /**
- * \brief convert UMF attributes flags to AVRational fps
- * \param flags UMF flags to convert
- * \return fps as AVRational, or 0 / 0 if unknown
+ * @brief convert UMF attributes flags to AVRational fps
+ * @param flags UMF flags to convert
+ * @return fps as AVRational, or 0 / 0 if unknown
  */
 static AVRational fps_umf2avr(uint32_t flags) {
     static const AVRational map[] = {{50, 1}, {60000, 1001}, {24, 1},
@@ -202,9 +202,9 @@ static AVRational fps_umf2avr(uint32_t flags) {
 }
 
 /**
- * \brief filters out interesting tags from track information.
- * \param len length of tag section, will be adjusted to contain remaining bytes
- * \param si struct to store collected information into
+ * @brief filters out interesting tags from track information.
+ * @param len length of tag section, will be adjusted to contain remaining bytes
+ * @param si struct to store collected information into
  */
 static void gxf_track_tags(AVIOContext *pb, int *len, struct gxf_stream_info *si) {
     si->frames_per_second = (AVRational){0, 0};
@@ -228,7 +228,7 @@ static void gxf_track_tags(AVIOContext *pb, int *len, struct gxf_stream_info *si
 }
 
 /**
- * \brief read index from FLT packet into stream 0 av_index
+ * @brief read index from FLT packet into stream 0 av_index
  */
 static void gxf_read_index(AVFormatContext *s, int pkt_len) {
     AVIOContext *pb = s->pb;
@@ -374,11 +374,11 @@ static int gxf_header(AVFormatContext *s, AVFormatParameters *ap) {
     }
 
 /**
- * \brief resync the stream on the next media packet with specified properties
- * \param max_interval how many bytes to search for matching packet at most
- * \param track track id the media packet must belong to, -1 for any
- * \param timestamp minimum timestamp (== field number) the packet must have, -1 for any
- * \return timestamp of packet found
+ * @brief resync the stream on the next media packet with specified properties
+ * @param max_interval how many bytes to search for matching packet at most
+ * @param track track id the media packet must belong to, -1 for any
+ * @param timestamp minimum timestamp (== field number) the packet must have, -1 for any
+ * @return timestamp of packet found
  */
 static int64_t gxf_resync_media(AVFormatContext *s, uint64_t max_interval, int track, int timestamp) {
     uint32_t tmp;
diff --git a/libavformat/internal.h b/libavformat/internal.h
index e0d04db43f..5636f78ec7 100644
--- a/libavformat/internal.h
+++ b/libavformat/internal.h
@@ -106,7 +106,7 @@ uint64_t ff_ntp_time(void);
  */
 int ff_url_join(char *str, int size, const char *proto,
                 const char *authorization, const char *hostname,
-                int port, const char *fmt, ...);
+                int port, const char *fmt, ...) av_printf_format(7, 8);
 
 /**
  * Append the media-specific SDP fragment for the media stream c
diff --git a/libavformat/mpegts.c b/libavformat/mpegts.c
index 74c9ac931c..ad3cd824f4 100644
--- a/libavformat/mpegts.c
+++ b/libavformat/mpegts.c
@@ -232,11 +232,11 @@ static void set_pcr_pid(AVFormatContext *s, unsigned int programid, unsigned int
 }
 
 /**
- * \brief discard_pid() decides if the pid is to be discarded according
+ * @brief discard_pid() decides if the pid is to be discarded according
  *                      to caller's programs selection
- * \param ts    : - TS context
- * \param pid   : - pid
- * \return 1 if the pid is only comprised in programs that have .discard=AVDISCARD_ALL
+ * @param ts    : - TS context
+ * @param pid   : - pid
+ * @return 1 if the pid is only comprised in programs that have .discard=AVDISCARD_ALL
  *         0 otherwise
  */
 static int discard_pid(MpegTSContext *ts, unsigned int pid)
diff --git a/libavformat/nuv.c b/libavformat/nuv.c
index db31689753..a75f1d6d7f 100644
--- a/libavformat/nuv.c
+++ b/libavformat/nuv.c
@@ -49,11 +49,11 @@ static int nuv_probe(AVProbeData *p) {
 #define PKTSIZE(s) (s &  0xffffff)
 
 /**
- * \brief read until we found all data needed for decoding
- * \param vst video stream of which to change parameters
- * \param ast video stream of which to change parameters
- * \param myth set if this is a MythTVVideo format file
- * \return 1 if all required codec data was found
+ * @brief read until we found all data needed for decoding
+ * @param vst video stream of which to change parameters
+ * @param ast video stream of which to change parameters
+ * @param myth set if this is a MythTVVideo format file
+ * @return 1 if all required codec data was found
  */
 static int get_codec_data(AVIOContext *pb, AVStream *vst,
                           AVStream *ast, int myth) {
diff --git a/libavformat/rtpproto.c b/libavformat/rtpproto.c
index 8b23f25c46..d4d7f3cb85 100644
--- a/libavformat/rtpproto.c
+++ b/libavformat/rtpproto.c
@@ -86,7 +86,7 @@ int rtp_set_remote_url(URLContext *h, const char *uri)
  * "http://host:port/path?option1=val1&option2=val2...
  */
 
-static void url_add_option(char *buf, int buf_size, const char *fmt, ...)
+static av_printf_format(3, 4) void url_add_option(char *buf, int buf_size, const char *fmt, ...)
 {
     char buf1[1024];
     va_list ap;
diff --git a/libavutil/arm/intmath.h b/libavutil/arm/intmath.h
index 4130177549..efe3915350 100644
--- a/libavutil/arm/intmath.h
+++ b/libavutil/arm/intmath.h
@@ -36,6 +36,7 @@ static av_always_inline av_const int FASTDIV(int a, int b)
     int r;
     __asm__ ("cmp     %2, #2               \n\t"
              "ldr     %0, [%3, %2, lsl #2] \n\t"
+             "ite     le                   \n\t"
              "lsrle   %0, %1, #1           \n\t"
              "smmulgt %0, %0, %1           \n\t"
              : "=&r"(r) : "r"(a), "r"(b), "r"(ff_inverse) : "cc");
@@ -101,6 +102,7 @@ static av_always_inline av_const int32_t av_clipl_int32_arm(int64_t a)
 {
     int x, y;
     __asm__ ("adds   %1, %R2, %Q2, lsr #31  \n\t"
+             "itet   ne                     \n\t"
              "mvnne  %1, #1<<31             \n\t"
              "moveq  %0, %Q2                \n\t"
              "eorne  %0, %1,  %R2, asr #31  \n\t"
diff --git a/libavutil/attributes.h b/libavutil/attributes.h
index 517b129f37..e97fdfd466 100644
--- a/libavutil/attributes.h
+++ b/libavutil/attributes.h
@@ -127,8 +127,10 @@
 
 #ifdef __GNUC__
 #    define av_builtin_constant_p __builtin_constant_p
+#    define av_printf_format(fmtpos, attrpos) __attribute__((__format__(__printf__, fmtpos, attrpos)))
 #else
 #    define av_builtin_constant_p(x) 0
+#    define av_printf_format(fmtpos, attrpos)
 #endif
 
 #endif /* AVUTIL_ATTRIBUTES_H */
diff --git a/libavutil/avstring.h b/libavutil/avstring.h
index 04d1197386..662af6b70d 100644
--- a/libavutil/avstring.h
+++ b/libavutil/avstring.h
@@ -22,6 +22,7 @@
 #define AVUTIL_AVSTRING_H
 
 #include <stddef.h>
+#include "attributes.h"
 
 /**
  * Return non-zero if pfx is a prefix of str. If it is, *ptr is set to
@@ -107,7 +108,7 @@ size_t av_strlcat(char *dst, const char *src, size_t size);
  * @return the length of the string that would have been generated
  *  if enough space had been available
  */
-size_t av_strlcatf(char *dst, size_t size, const char *fmt, ...);
+size_t av_strlcatf(char *dst, size_t size, const char *fmt, ...) av_printf_format(3, 4);
 
 /**
  * Convert a number to a av_malloced string.
diff --git a/libavutil/avutil.h b/libavutil/avutil.h
index 4d6ef66003..33eacc7fae 100644
--- a/libavutil/avutil.h
+++ b/libavutil/avutil.h
@@ -40,8 +40,8 @@
 #define AV_VERSION(a, b, c) AV_VERSION_DOT(a, b, c)
 
 #define LIBAVUTIL_VERSION_MAJOR 51
-#define LIBAVUTIL_VERSION_MINOR  9
-#define LIBAVUTIL_VERSION_MICRO  1
+#define LIBAVUTIL_VERSION_MINOR 10
+#define LIBAVUTIL_VERSION_MICRO  0
 
 #define LIBAVUTIL_VERSION_INT   AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \
                                                LIBAVUTIL_VERSION_MINOR, \
diff --git a/libavutil/common.h b/libavutil/common.h
index 1cd2de2909..d60e8638a5 100644
--- a/libavutil/common.h
+++ b/libavutil/common.h
@@ -270,16 +270,16 @@ static av_always_inline av_const int av_popcount_c(uint32_t x)
         }\
     }\
 
-/*!
- * \def PUT_UTF8(val, tmp, PUT_BYTE)
+/**
+ * @def PUT_UTF8(val, tmp, PUT_BYTE)
  * Convert a 32-bit Unicode character to its UTF-8 encoded form (up to 4 bytes long).
- * \param val is an input-only argument and should be of type uint32_t. It holds
+ * @param val is an input-only argument and should be of type uint32_t. It holds
  * a UCS-4 encoded Unicode character that is to be converted to UTF-8. If
  * val is given as a function it is executed only once.
- * \param tmp is a temporary variable and should be of type uint8_t. It
+ * @param tmp is a temporary variable and should be of type uint8_t. It
  * represents an intermediate value during conversion that is to be
  * output by PUT_BYTE.
- * \param PUT_BYTE writes the converted UTF-8 bytes to any proper destination.
+ * @param PUT_BYTE writes the converted UTF-8 bytes to any proper destination.
  * It could be a function or a statement, and uses tmp as the input byte.
  * For example, PUT_BYTE could be "*output++ = tmp;" PUT_BYTE will be
  * executed up to 4 times for values in the valid UTF-8 range and up to
@@ -306,16 +306,16 @@ static av_always_inline av_const int av_popcount_c(uint32_t x)
         }\
     }
 
-/*!
- * \def PUT_UTF16(val, tmp, PUT_16BIT)
+/**
+ * @def PUT_UTF16(val, tmp, PUT_16BIT)
  * Convert a 32-bit Unicode character to its UTF-16 encoded form (2 or 4 bytes).
- * \param val is an input-only argument and should be of type uint32_t. It holds
+ * @param val is an input-only argument and should be of type uint32_t. It holds
  * a UCS-4 encoded Unicode character that is to be converted to UTF-16. If
  * val is given as a function it is executed only once.
- * \param tmp is a temporary variable and should be of type uint16_t. It
+ * @param tmp is a temporary variable and should be of type uint16_t. It
  * represents an intermediate value during conversion that is to be
  * output by PUT_16BIT.
- * \param PUT_16BIT writes the converted UTF-16 data to any proper destination
+ * @param PUT_16BIT writes the converted UTF-16 data to any proper destination
  * in desired endianness. It could be a function or a statement, and uses tmp
  * as the input byte.  For example, PUT_BYTE could be "*output++ = tmp;"
  * PUT_BYTE will be executed 1 or 2 times depending on input character.
diff --git a/libavutil/des.c b/libavutil/des.c
index 9c1a530666..f3482aebd6 100644
--- a/libavutil/des.c
+++ b/libavutil/des.c
@@ -240,7 +240,7 @@ static uint32_t f_func(uint32_t r, uint64_t k) {
 }
 
 /**
- * \brief rotate the two halves of the expanded 56 bit key each 1 bit left
+ * @brief rotate the two halves of the expanded 56 bit key each 1 bit left
  *
  * Note: the specification calls this "shift", so I kept it although
  * it is confusing.
diff --git a/libavutil/des.h b/libavutil/des.h
index e80bdd3e69..dd670869b2 100644
--- a/libavutil/des.h
+++ b/libavutil/des.h
@@ -30,22 +30,22 @@ struct AVDES {
 };
 
 /**
- * \brief Initializes an AVDES context.
+ * @brief Initializes an AVDES context.
  *
- * \param key_bits must be 64 or 192
- * \param decrypt 0 for encryption, 1 for decryption
+ * @param key_bits must be 64 or 192
+ * @param decrypt 0 for encryption, 1 for decryption
  */
 int av_des_init(struct AVDES *d, const uint8_t *key, int key_bits, int decrypt);
 
 /**
- * \brief Encrypts / decrypts using the DES algorithm.
+ * @brief Encrypts / decrypts using the DES algorithm.
  *
- * \param count number of 8 byte blocks
- * \param dst destination array, can be equal to src, must be 8-byte aligned
- * \param src source array, can be equal to dst, must be 8-byte aligned, may be NULL
- * \param iv initialization vector for CBC mode, if NULL then ECB will be used,
+ * @param count number of 8 byte blocks
+ * @param dst destination array, can be equal to src, must be 8-byte aligned
+ * @param src source array, can be equal to dst, must be 8-byte aligned, may be NULL
+ * @param iv initialization vector for CBC mode, if NULL then ECB will be used,
  *           must be 8-byte aligned
- * \param decrypt 0 for encryption, 1 for decryption
+ * @param decrypt 0 for encryption, 1 for decryption
  */
 void av_des_crypt(struct AVDES *d, uint8_t *dst, const uint8_t *src, int count, uint8_t *iv, int decrypt);
 
diff --git a/libavutil/log.h b/libavutil/log.h
index 53c8aa0f67..046d19920e 100644
--- a/libavutil/log.h
+++ b/libavutil/log.h
@@ -23,6 +23,7 @@
 
 #include <stdarg.h>
 #include "avutil.h"
+#include "attributes.h"
 
 /**
  * Describe the class of an AVClass context structure. That is an
@@ -129,11 +130,7 @@ typedef struct {
  * subsequent arguments are converted to output.
  * @see av_vlog
  */
-#ifdef __GNUC__
-void av_log(void *avcl, int level, const char *fmt, ...) __attribute__ ((__format__ (__printf__, 3, 4)));
-#else
-void av_log(void *avcl, int level, const char *fmt, ...);
-#endif
+void av_log(void *avcl, int level, const char *fmt, ...) av_printf_format(3, 4);
 
 void av_vlog(void *avcl, int level, const char *fmt, va_list);
 int av_log_get_level(void);
diff --git a/libavutil/lzo.c b/libavutil/lzo.c
index 40a41a424d..bac762ecc3 100644
--- a/libavutil/lzo.c
+++ b/libavutil/lzo.c
@@ -37,8 +37,8 @@ typedef struct LZOContext {
 } LZOContext;
 
 /**
- * \brief Reads one byte from the input buffer, avoiding an overrun.
- * \return byte read
+ * @brief Reads one byte from the input buffer, avoiding an overrun.
+ * @return byte read
  */
 static inline int get_byte(LZOContext *c) {
     if (c->in < c->in_end)
@@ -54,10 +54,10 @@ static inline int get_byte(LZOContext *c) {
 #endif
 
 /**
- * \brief Decodes a length value in the coding used by lzo.
- * \param x previous byte value
- * \param mask bits used from x
- * \return decoded length value
+ * @brief Decodes a length value in the coding used by lzo.
+ * @param x previous byte value
+ * @param mask bits used from x
+ * @return decoded length value
  */
 static inline int get_len(LZOContext *c, int x, int mask) {
     int cnt = x & mask;
@@ -82,8 +82,8 @@ static inline int get_len(LZOContext *c, int x, int mask) {
 #endif
 
 /**
- * \brief Copies bytes from input to output buffer with checking.
- * \param cnt number of bytes to copy, must be >= 0
+ * @brief Copies bytes from input to output buffer with checking.
+ * @param cnt number of bytes to copy, must be >= 0
  */
 static inline void copy(LZOContext *c, int cnt) {
     register const uint8_t *src = c->in;
@@ -111,9 +111,9 @@ static inline void copy(LZOContext *c, int cnt) {
 static inline void memcpy_backptr(uint8_t *dst, int back, int cnt);
 
 /**
- * \brief Copies previously decoded bytes to current position.
- * \param back how many bytes back we start
- * \param cnt number of bytes to copy, must be >= 0
+ * @brief Copies previously decoded bytes to current position.
+ * @param back how many bytes back we start
+ * @param cnt number of bytes to copy, must be >= 0
  *
  * cnt > back is valid, this will copy the bytes we just copied,
  * thus creating a repeating pattern with a period length of back.
diff --git a/libavutil/lzo.h b/libavutil/lzo.h
index 6788054bff..d1b8fa73ba 100644
--- a/libavutil/lzo.h
+++ b/libavutil/lzo.h
@@ -24,7 +24,7 @@
 
 #include <stdint.h>
 
-/** \defgroup errflags Error flags returned by av_lzo1x_decode
+/** @defgroup errflags Error flags returned by av_lzo1x_decode
   * \{ */
 //! end of the input buffer reached before decoding finished
 #define AV_LZO_INPUT_DEPLETED 1
@@ -40,12 +40,12 @@
 #define AV_LZO_OUTPUT_PADDING 12
 
 /**
- * \brief Decodes LZO 1x compressed data.
- * \param out output buffer
- * \param outlen size of output buffer, number of bytes left are returned here
- * \param in input buffer
- * \param inlen size of input buffer, number of bytes left are returned here
- * \return 0 on success, otherwise a combination of the error flags above
+ * @brief Decodes LZO 1x compressed data.
+ * @param out output buffer
+ * @param outlen size of output buffer, number of bytes left are returned here
+ * @param in input buffer
+ * @param inlen size of input buffer, number of bytes left are returned here
+ * @return 0 on success, otherwise a combination of the error flags above
  *
  * Make sure all buffers are appropriately padded, in must provide
  * AV_LZO_INPUT_PADDING, out must provide AV_LZO_OUTPUT_PADDING additional bytes.
@@ -53,10 +53,10 @@
 int av_lzo1x_decode(void *out, int *outlen, const void *in, int *inlen);
 
 /**
- * \brief deliberately overlapping memcpy implementation
- * \param dst destination buffer; must be padded with 12 additional bytes
- * \param back how many bytes back we start (the initial size of the overlapping window)
- * \param cnt number of bytes to copy, must be >= 0
+ * @brief deliberately overlapping memcpy implementation
+ * @param dst destination buffer; must be padded with 12 additional bytes
+ * @param back how many bytes back we start (the initial size of the overlapping window)
+ * @param cnt number of bytes to copy, must be >= 0
  *
  * cnt > back is valid, this will copy the bytes we just copied,
  * thus creating a repeating pattern with a period length of back.
diff --git a/libavutil/rc4.h b/libavutil/rc4.h
index 07223a5c9e..9362fd8880 100644
--- a/libavutil/rc4.h
+++ b/libavutil/rc4.h
@@ -29,21 +29,21 @@ struct AVRC4 {
 };
 
 /**
- * \brief Initializes an AVRC4 context.
+ * @brief Initializes an AVRC4 context.
  *
- * \param key_bits must be a multiple of 8
- * \param decrypt 0 for encryption, 1 for decryption, currently has no effect
+ * @param key_bits must be a multiple of 8
+ * @param decrypt 0 for encryption, 1 for decryption, currently has no effect
  */
 int av_rc4_init(struct AVRC4 *d, const uint8_t *key, int key_bits, int decrypt);
 
 /**
- * \brief Encrypts / decrypts using the RC4 algorithm.
+ * @brief Encrypts / decrypts using the RC4 algorithm.
  *
- * \param count number of bytes
- * \param dst destination array, can be equal to src
- * \param src source array, can be equal to dst, may be NULL
- * \param iv not (yet) used for RC4, should be NULL
- * \param decrypt 0 for encryption, 1 for decryption, not (yet) used
+ * @param count number of bytes
+ * @param dst destination array, can be equal to src
+ * @param src source array, can be equal to dst, may be NULL
+ * @param iv not (yet) used for RC4, should be NULL
+ * @param decrypt 0 for encryption, 1 for decryption, not (yet) used
  */
 void av_rc4_crypt(struct AVRC4 *d, uint8_t *dst, const uint8_t *src, int count, uint8_t *iv, int decrypt);
 
diff --git a/subdir.mak b/subdir.mak
index 092662fc14..8b3807378f 100644
--- a/subdir.mak
+++ b/subdir.mak
@@ -1,4 +1,4 @@
-SRC_DIR := $(SRC_PATH_BARE)/lib$(NAME)
+SRC_DIR := $(SRC_PATH)/lib$(NAME)
 
 include $(SUBDIR)../common.mak
 
diff --git a/tests/Makefile b/tests/Makefile
index 1f7ba26aff..fb9b56e6b3 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -41,16 +41,16 @@ tests/seek_test$(EXESUF): tests/seek_test.o $(FF_DEP_LIBS)
 tools/lavfi-showfiltfmts$(EXESUF): tools/lavfi-showfiltfmts.o $(FF_DEP_LIBS)
 	$(LD) $(FF_LDFLAGS) -o $@ $< $(FF_EXTRALIBS)
 
-include $(SRC_PATH_BARE)/tests/fate.mak
-include $(SRC_PATH_BARE)/tests/fate2.mak
-
-include $(SRC_PATH_BARE)/tests/fate/aac.mak
-include $(SRC_PATH_BARE)/tests/fate/als.mak
-include $(SRC_PATH_BARE)/tests/fate/fft.mak
-include $(SRC_PATH_BARE)/tests/fate/h264.mak
-include $(SRC_PATH_BARE)/tests/fate/mp3.mak
-include $(SRC_PATH_BARE)/tests/fate/vorbis.mak
-include $(SRC_PATH_BARE)/tests/fate/vp8.mak
+include $(SRC_PATH)/tests/fate.mak
+include $(SRC_PATH)/tests/fate2.mak
+
+include $(SRC_PATH)/tests/fate/aac.mak
+include $(SRC_PATH)/tests/fate/als.mak
+include $(SRC_PATH)/tests/fate/fft.mak
+include $(SRC_PATH)/tests/fate/h264.mak
+include $(SRC_PATH)/tests/fate/mp3.mak
+include $(SRC_PATH)/tests/fate/vorbis.mak
+include $(SRC_PATH)/tests/fate/vp8.mak
 
 FATE_ACODEC  = $(ACODEC_TESTS:%=fate-acodec-%)
 FATE_VSYNTH1 = $(VCODEC_TESTS:%=fate-vsynth1-%)
diff --git a/tests/fate/vp8.mak b/tests/fate/vp8.mak
index 2b171305cc..1f442af4ac 100644
--- a/tests/fate/vp8.mak
+++ b/tests/fate/vp8.mak
@@ -3,7 +3,7 @@ VP8_SUITE = 001 002 003 004 005 006 007 008 009 010 011 012 013 014 015 016 017
 define FATE_VP8_SUITE
 FATE_VP8 += fate-vp8-test-vector$(2)-$(1)
 fate-vp8-test-vector$(2)-$(1): CMD = framemd5 $(3) -i $(SAMPLES)/vp8-test-vectors-r1/vp80-00-comprehensive-$(1).ivf
-fate-vp8-test-vector$(2)-$(1): REF = $(SRC_PATH_BARE)/tests/ref/fate/vp8-test-vector-$(1)
+fate-vp8-test-vector$(2)-$(1): REF = $(SRC_PATH)/tests/ref/fate/vp8-test-vector-$(1)
 endef
 
 define FATE_VP8_FULL
@@ -11,7 +11,7 @@ $(foreach N,$(VP8_SUITE),$(eval $(call FATE_VP8_SUITE,$(N),$(1),$(2))))
 
 FATE_VP8 += fate-vp8-sign-bias$(1)
 fate-vp8-sign-bias$(1): CMD = framemd5 $(2) -i $(SAMPLES)/vp8/sintel-signbias.ivf
-fate-vp8-sign-bias$(1): REF = $(SRC_PATH_BARE)/tests/ref/fate/vp8-sign-bias
+fate-vp8-sign-bias$(1): REF = $(SRC_PATH)/tests/ref/fate/vp8-sign-bias
 endef
 
 $(eval $(call FATE_VP8_FULL))